In [None]:
# import list 
import pandas as pd 
import numpy as np 
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree 
from sklearn.linear_model import LogisticRegression #Import LogisticRegression 
from sklearn import svm #Import SVM 
from sklearn.model_selection import train_test_split # Import train_test_split function 
from sklearn.preprocessing import MinMaxScaler 
from sklearn.metrics import accuracy_score 
from sklearn.model_selection import KFold 
 
# Load dataset 
Data = pd.read_csv("breast-cancer-wisconsin.data",names=['Sample code number','Clump Thickness','Uniformity of Cell Size','Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial Cell Size','Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses','Class']) 
 
Data = Data.apply(pd.to_numeric,errors='coerce').fillna(0) 
 
Data["Class"] = Data.Class.map({2:0,4:1}) 
 
print(Data) 
 
 
def classification_Compare(data, model, K): 
    feature = data[['Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape', 'Marginal Adhesion', 
                    'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli', 'Mitoses']] 
    feature = feature.to_numpy().astype(np.uint32) 
    label = data.Class 
    label = label.to_numpy().astype(np.uint32) 
 
    scaler = MinMaxScaler() 
 
    scaler.fit(feature) 
 
    kfold = KFold(n_splits=K) 
 
    cv_accuracy = [] 
    n_iter = 0 
 
    for train_index, test_index in kfold.split(feature):  # Split the features data by the kfold number specified above 
 
        x_train, x_test = feature[train_index], feature[test_index] 
        y_train, y_test = label[train_index], label[test_index] 
 
        model.fit(x_train, y_train) 
        pred = model.predict(x_test) 
        n_iter += 1 
 
        accuracy = np.round(accuracy_score(y_test, pred), 4)  # round to 4 decimal places 
        train_size = x_train.shape[0] 
        test_size = x_test.shape[0] 
 
        print('\n#{0} Cross-validation accuracy : {1},  Training data size : {2},  Validation data size : {3}' 
              .format(n_iter, accuracy, train_size, test_size)) 
        cv_accuracy.append(accuracy) 
 
    for train_index, test_index in kfold.split(feature):  # Split Min_Max scaled featres data by kfold number specified above 
 
        x_train, x_test = feature[train_index], feature[test_index] 
        y_train, y_test = label[train_index], label[test_index] 
 
        x_train = scaler.transform(x_train) 
        x_test = scaler.transform(x_test) 
 
        model.fit(x_train, y_train) 
        pred = model.predict(x_test) 
        n_iter += 1 
 
        accuracy = np.round(accuracy_score(y_test, pred), 4)  # round to 4 decimal places 
        train_size = x_train.shape[0] 
        test_size = x_test.shape[0] 
 
        print('\n#{0} Min_Max_Scaler Cross-validation accuracy : {1},  Training data size : {2},  Validation data size : {3}' 
              .format(n_iter, accuracy, train_size, test_size)) 
        cv_accuracy.append(accuracy) 
 
    return cv_accuracy 
 
 
def make_model(K, data, case, **kwargs): 
    if (case == 1): 
        model = DecisionTreeClassifier(criterion="entropy", splitter=kwargs.get('splitter', "best") 
                                       , max_depth=kwargs.get('max_depth', None) 
                                       , min_samples_split=kwargs.get('min_samples_split', 2) 
                                       , min_samples_leaf=kwargs.get('min_samples_leaf', 1) 
                                       , min_weight_fraction_leaf=kwargs.get('min_weight_fraction_leaf', 0.0) 
                                       , max_features=kwargs.get('max_features', None) 
                                       , random_state=kwargs.get('random_state', None) 
                                       , max_leaf_nodes=kwargs.get('max_leaf_nodes', None) 
                                       , min_impurity_decrease=kwargs.get('min_impurity_decrease', 0.0) 
                                       , class_weight=kwargs.get('class_weight', None) 
                                       , ccp_alpha=kwargs.get('ccp_alpha', 0.0)) 
        return classification_Compare(data, model, K) 
    elif (case == 2): 
        model = DecisionTreeClassifier(criterion="entropy", splitter=kwargs.get('splitter', "best") 
                                       , max_depth=kwargs.get('max_depth', None) 
                                       , min_samples_split=kwargs.get('min_samples_split', 2) 
                                       , min_samples_leaf=kwargs.get('min_samples_leaf', 1) 
                                       , min_weight_fraction_leaf=kwargs.get('min_weight_fraction_leaf', 0.0) 
                                       , max_features=kwargs.get('max_features', None) 
                                       , random_state=kwargs.get('random_state', None) 
                                       , max_leaf_nodes=kwargs.get('max_leaf_nodes', None) 
                                       , min_impurity_decrease=kwargs.get('min_impurity_decrease', 0.0) 
                                       , class_weight=kwargs.get('class_weight', None) 
                                       , ccp_alpha=kwargs.get('ccp_alpha', 0.0)) 
        return classification_Compare(data, model, K) 
    elif (case == 3): 
        model = LogisticRegression(penalty=kwargs.get('penalty', 'l2') 
                                   , dual=kwargs.get('dual', False) 
                                   , tol=kwargs.get('tol', 1e-4) 
                                   , C=kwargs.get('C', 1.0) 
                                   , fit_intercept=kwargs.get('fit_intercept', True) 
                                   , intercept_scaling=kwargs.get('intercept_scaling', 1) 
                                   , class_weight=kwargs.get('class_weight', None) 
                                   , random_state=kwargs.get('random_state', None) 
                                   , solver=kwargs.get('solver', 'lbfgs') 
                                   , max_iter=kwargs.get('max_iter', 100) 
                                   , multi_class=kwargs.get('multi_class', 'auto') 
                                   , verbose=kwargs.get('verbose', 0) 
                                   , warm_start=kwargs.get('warm_start', False) 
                                   , n_jobs=kwargs.get('n_jobs', None) 
                                   , l1_ratio=kwargs.get('l1_ratio', None)) 
        return classification_Compare(data, model, K) 
    elif (case == 4): 
        model = svm.SVC(C=kwargs.get('C', 1.0), 
                        degree=kwargs.get('degree', 3), 
                        gamma=kwargs.get('gamma', 'scale'), 
                        coef0=kwargs.get('coef0', 0.0), 
                        shrinking=kwargs.get('shrinking', True), 
                        probability=kwargs.get('probability', False), 
                        tol=kwargs.get('tol', 1e-3), 
                        cache_size=kwargs.get('cache_size', 200), 
                        class_weight=kwargs.get('class_weight', None), 
                        verbose=kwargs.get('verbose', False), 
                        max_iter=kwargs.get('max_iter', -1), 
                        decision_function_shape=kwargs.get('decision_function_shape', 'ovr'), 
                        break_ties=kwargs.get('break_ties', False), 
                        random_state=kwargs.get('random_state', None)) 
        return classification_Compare(data, model, K) 
    else: 
        print('case error! (1~4)') 
 
 
acc_list = [] 
 
def acc_test(acc_list): 
    print('\n<DecisionTreeClassifier_gini>') 
    tmp = make_model(3,Data,1,max_depth=10,min_samples_split=3,max_features='sqrt') 
    acc_list.append(tmp) 
 
    print('\n\n<DecisionTreeClassifier_entropy>') 
    tmp = make_model(3,Data,2,max_depth=7,min_samples_split=2,max_features='sqrt') 
    acc_list.append(tmp) 
 
    print('\n\n<LogisticRegression>') 
    tmp = make_model(3,Data,3,random_state=30,max_iter=100) 
    acc_list.append(tmp) 
 
    print('\n\n<SVM>') 
    tmp = make_model(3,Data,4,random_state=20,max_iter=10) 
    acc_list.append(tmp) 
 
acc_test(acc_list) 
 
# Print all Accuracy 
print('\n<Accuracy output for all models> \n\n[DecisionTreeGini] [DecisionTreeEntropy] [LogisticRegression] [SVM]\n') 
print(acc_list) 