In [35]:
import numpy as np
import pandas as pd

In [36]:
df = pd.read_csv('../data/migraine.csv')
pd.set_option('display.float_format', '{:0.2f}'.format)

In [37]:
df.shape

(400, 24)

In [38]:
print(df.groupby('Type').size())

Type
Basilar-type aura                 18
Familial hemiplegic migraine      24
Migraine without aura             60
Other                             17
Sporadic hemiplegic migraine      14
Typical aura with migraine       247
Typical aura without migraine     20
dtype: int64


In [39]:
from sklearn.preprocessing import MinMaxScaler

X = df.iloc[:, :-1]
y = df['Type']

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [40]:
y.unique().shape

(7,)

### KNN Classifier

In [41]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.model_selection import cross_val_score

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
score = cross_val_score(knn, X_train, y_train, cv=10, scoring='accuracy')

y_pred = knn.predict(X_test)
print("KNN Precision:\t ", format(knn.score(X_train, y_train)))
print("KNN Accuracy:\t ", metrics.accuracy_score(y_test, y_pred))
print("Cross_val Accuracies:\t", score)
print("Avg Cross_val:\t",  score.mean())

KNN Precision:	  0.903125
KNN Accuracy:	  0.7625
Cross_val Accuracies:	 [0.75    0.84375 0.78125 0.8125  0.875   0.75    0.78125 0.8125  0.78125
 0.84375]
Avg Cross_val:	 0.803125


In [42]:
from sklearn.metrics import classification_report

y_pred = knn.predict(X_test)

print(classification_report(y_true = y_test, y_pred = y_pred))

                               precision    recall  f1-score   support

            Basilar-type aura       0.67      0.29      0.40         7
 Familial hemiplegic migraine       0.43      0.60      0.50         5
        Migraine without aura       0.71      0.62      0.67         8
                        Other       1.00      0.33      0.50         3
 Sporadic hemiplegic migraine       0.00      0.00      0.00         4
   Typical aura with migraine       0.81      0.96      0.88        49
Typical aura without migraine       1.00      0.75      0.86         4

                     accuracy                           0.76        80
                    macro avg       0.66      0.51      0.54        80
                 weighted avg       0.74      0.76      0.73        80



In [43]:
set(y_test) - set(y_pred)

set()

In [44]:
predictions = knn.predict(X)

In [45]:
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

print('współczynnik zarysu:\t\t', silhouette_score(X, predictions))
print('indeks Calińskiego-Harabasza:\t', calinski_harabasz_score(X, predictions))
print('indeks Daviesa-Bouldina:\t', davies_bouldin_score(X, predictions))

współczynnik zarysu:		 0.0960433853570669
indeks Calińskiego-Harabasza:	 17.10195409324369
indeks Daviesa-Bouldina:	 2.4088455062927845
