In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

In [2]:
# x_train, x_test, y_train, y_test
def getData(par_feature):
    dataset = load_breast_cancer()
    feature = pd.DataFrame(dataset.data, columns=dataset.feature_names)
    target = pd.Series(dataset.target, name='target')
    df = pd.concat([feature, target], axis=1)
    myset = list(train_test_split(
        feature[par_feature], target, test_size=0.3, 
        random_state=42, stratify=target
    ))
    return myset

In [3]:
def Std(myset):
    ss = StandardScaler()
    return ss.fit_transform(myset[0]), ss.transform(myset[1])

In [4]:
def predict(myset, k):
    knn = KNeighborsClassifier(n_neighbors = k)
    knn.fit(myset[0], myset[2])
    return knn.predict(myset[1])

In [5]:
def result(test, pred):
    accuracy = round(accuracy_score(test, pred), 2)
    precision = round(precision_score(test, pred), 2)
    recall = round(recall_score(test, pred), 2)
    f1 = round(f1_score(test, pred), 2)
    return (accuracy, precision, recall, f1)

In [6]:
def run(f, std, k):
    myset = getData(f)
    if std == True:
        myset[0], myset[1] = Std(myset)
    y_pred = predict(myset, k)
    metric = result(myset[3], y_pred)
    return metric

In [7]:
def show(metric):
    met = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
    for i in range(4):
        print(met[i], metric[i])

In [8]:
load_breast_cancer().feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [9]:
areas = ['mean radius', 'mean perimeter', 'mean area']
textures = ['mean texture', 'mean smoothness']
pairs = []
flist = []
for area in areas:
    for texture in textures:
        for std in ['std', 'no std']:
            pairs.append(f'{area} + {texture} + {std}')
        flist.append([area, texture])

In [10]:
def showDF(k):
    print('k =', k)
    sol = [[] for i in range(4)]
    for f in flist:
        for tf in [True, False]:
            ans = run(f, tf, k)
            for i in range(4):
                sol[i].append(ans[i])
    return sol

In [11]:
pd.DataFrame(showDF(1), columns=pairs)

k = 1


Unnamed: 0,mean radius + mean texture + std,mean radius + mean texture + no std,mean radius + mean smoothness + std,mean radius + mean smoothness + no std,mean perimeter + mean texture + std,mean perimeter + mean texture + no std,mean perimeter + mean smoothness + std,mean perimeter + mean smoothness + no std,mean area + mean texture + std,mean area + mean texture + no std,mean area + mean smoothness + std,mean area + mean smoothness + no std
0,0.84,0.82,0.87,0.86,0.85,0.87,0.85,0.84,0.82,0.85,0.88,0.84
1,0.9,0.88,0.94,0.88,0.9,0.91,0.92,0.88,0.87,0.87,0.93,0.85
2,0.84,0.83,0.84,0.9,0.85,0.87,0.83,0.86,0.84,0.91,0.87,0.9
3,0.87,0.86,0.89,0.89,0.88,0.89,0.87,0.87,0.85,0.89,0.9,0.87


In [12]:
pd.DataFrame(showDF(2), columns=pairs)

k = 2


Unnamed: 0,mean radius + mean texture + std,mean radius + mean texture + no std,mean radius + mean smoothness + std,mean radius + mean smoothness + no std,mean perimeter + mean texture + std,mean perimeter + mean texture + no std,mean perimeter + mean smoothness + std,mean perimeter + mean smoothness + no std,mean area + mean texture + std,mean area + mean texture + no std,mean area + mean smoothness + std,mean area + mean smoothness + no std
0,0.82,0.81,0.82,0.82,0.84,0.87,0.82,0.82,0.81,0.86,0.84,0.82
1,0.94,0.94,0.98,0.89,0.95,0.96,0.98,0.9,0.94,0.93,0.98,0.9
2,0.76,0.75,0.73,0.81,0.79,0.83,0.73,0.79,0.74,0.84,0.76,0.81
3,0.84,0.83,0.83,0.85,0.86,0.89,0.83,0.85,0.83,0.88,0.85,0.85


In [13]:
pd.DataFrame(showDF(3), columns=pairs)

k = 3


Unnamed: 0,mean radius + mean texture + std,mean radius + mean texture + no std,mean radius + mean smoothness + std,mean radius + mean smoothness + no std,mean perimeter + mean texture + std,mean perimeter + mean texture + no std,mean perimeter + mean smoothness + std,mean perimeter + mean smoothness + no std,mean area + mean texture + std,mean area + mean texture + no std,mean area + mean smoothness + std,mean area + mean smoothness + no std
0,0.86,0.87,0.87,0.85,0.9,0.89,0.88,0.87,0.84,0.87,0.84,0.88
1,0.89,0.9,0.94,0.87,0.93,0.93,0.96,0.88,0.86,0.88,0.93,0.87
2,0.89,0.88,0.84,0.91,0.91,0.9,0.84,0.92,0.89,0.92,0.81,0.94
3,0.89,0.89,0.89,0.89,0.92,0.91,0.9,0.9,0.88,0.9,0.87,0.91


In [14]:
pd.DataFrame(showDF(4), columns=pairs)

k = 4


Unnamed: 0,mean radius + mean texture + std,mean radius + mean texture + no std,mean radius + mean smoothness + std,mean radius + mean smoothness + no std,mean perimeter + mean texture + std,mean perimeter + mean texture + no std,mean perimeter + mean smoothness + std,mean perimeter + mean smoothness + no std,mean area + mean texture + std,mean area + mean texture + no std,mean area + mean smoothness + std,mean area + mean smoothness + no std
0,0.88,0.87,0.86,0.83,0.89,0.9,0.88,0.85,0.87,0.88,0.85,0.86
1,0.93,0.92,0.97,0.88,0.95,0.96,0.97,0.89,0.91,0.91,0.97,0.88
2,0.88,0.87,0.8,0.85,0.87,0.88,0.83,0.87,0.87,0.9,0.79,0.9
3,0.9,0.89,0.88,0.86,0.91,0.92,0.89,0.88,0.89,0.9,0.87,0.89


In [15]:
pd.DataFrame(showDF(5), columns=pairs)

k = 5


Unnamed: 0,mean radius + mean texture + std,mean radius + mean texture + no std,mean radius + mean smoothness + std,mean radius + mean smoothness + no std,mean perimeter + mean texture + std,mean perimeter + mean texture + no std,mean perimeter + mean smoothness + std,mean perimeter + mean smoothness + no std,mean area + mean texture + std,mean area + mean texture + no std,mean area + mean smoothness + std,mean area + mean smoothness + no std
0,0.87,0.86,0.87,0.87,0.88,0.88,0.87,0.87,0.87,0.88,0.87,0.87
1,0.89,0.88,0.95,0.86,0.91,0.92,0.93,0.87,0.89,0.88,0.94,0.87
2,0.9,0.9,0.83,0.93,0.91,0.89,0.85,0.93,0.91,0.93,0.84,0.93
3,0.89,0.89,0.89,0.9,0.91,0.9,0.89,0.9,0.9,0.9,0.89,0.9


In [16]:
pd.DataFrame(showDF(6), columns=pairs)

k = 6


Unnamed: 0,mean radius + mean texture + std,mean radius + mean texture + no std,mean radius + mean smoothness + std,mean radius + mean smoothness + no std,mean perimeter + mean texture + std,mean perimeter + mean texture + no std,mean perimeter + mean smoothness + std,mean perimeter + mean smoothness + no std,mean area + mean texture + std,mean area + mean texture + no std,mean area + mean smoothness + std,mean area + mean smoothness + no std
0,0.89,0.89,0.85,0.85,0.89,0.89,0.87,0.86,0.88,0.87,0.86,0.87
1,0.93,0.93,0.95,0.86,0.93,0.94,0.96,0.87,0.91,0.89,0.95,0.88
2,0.89,0.89,0.81,0.9,0.89,0.88,0.83,0.91,0.9,0.91,0.82,0.92
3,0.91,0.91,0.87,0.88,0.91,0.91,0.89,0.89,0.91,0.9,0.88,0.89


In [17]:
pd.DataFrame(showDF(7), columns=pairs)

k = 7


Unnamed: 0,mean radius + mean texture + std,mean radius + mean texture + no std,mean radius + mean smoothness + std,mean radius + mean smoothness + no std,mean perimeter + mean texture + std,mean perimeter + mean texture + no std,mean perimeter + mean smoothness + std,mean perimeter + mean smoothness + no std,mean area + mean texture + std,mean area + mean texture + no std,mean area + mean smoothness + std,mean area + mean smoothness + no std
0,0.89,0.89,0.88,0.87,0.92,0.92,0.89,0.86,0.91,0.88,0.88,0.87
1,0.92,0.9,0.94,0.86,0.93,0.94,0.93,0.86,0.91,0.86,0.93,0.86
2,0.92,0.93,0.87,0.93,0.93,0.93,0.9,0.93,0.94,0.95,0.87,0.95
3,0.92,0.91,0.9,0.9,0.93,0.93,0.91,0.89,0.93,0.91,0.9,0.9


In [18]:
pd.DataFrame(showDF(8), columns=pairs)

k = 8


Unnamed: 0,mean radius + mean texture + std,mean radius + mean texture + no std,mean radius + mean smoothness + std,mean radius + mean smoothness + no std,mean perimeter + mean texture + std,mean perimeter + mean texture + no std,mean perimeter + mean smoothness + std,mean perimeter + mean smoothness + no std,mean area + mean texture + std,mean area + mean texture + no std,mean area + mean smoothness + std,mean area + mean smoothness + no std
0,0.88,0.88,0.88,0.86,0.91,0.89,0.89,0.87,0.89,0.87,0.89,0.86
1,0.93,0.91,0.95,0.86,0.95,0.94,0.94,0.87,0.92,0.87,0.95,0.86
2,0.88,0.9,0.85,0.93,0.91,0.89,0.88,0.93,0.92,0.93,0.87,0.93
3,0.9,0.9,0.9,0.89,0.93,0.91,0.91,0.9,0.92,0.9,0.91,0.89


In [19]:
pd.DataFrame(showDF(9), columns=pairs)

k = 9


Unnamed: 0,mean radius + mean texture + std,mean radius + mean texture + no std,mean radius + mean smoothness + std,mean radius + mean smoothness + no std,mean perimeter + mean texture + std,mean perimeter + mean texture + no std,mean perimeter + mean smoothness + std,mean perimeter + mean smoothness + no std,mean area + mean texture + std,mean area + mean texture + no std,mean area + mean smoothness + std,mean area + mean smoothness + no std
0,0.88,0.89,0.88,0.86,0.93,0.89,0.89,0.86,0.91,0.88,0.89,0.87
1,0.9,0.89,0.91,0.85,0.94,0.92,0.93,0.85,0.9,0.86,0.92,0.86
2,0.92,0.93,0.9,0.94,0.95,0.91,0.89,0.93,0.96,0.95,0.9,0.95
3,0.91,0.91,0.91,0.89,0.94,0.91,0.91,0.89,0.93,0.91,0.91,0.9
