In [15]:
import numpy as np
import pandas as pd

In [16]:
df = pd.read_csv('../data/migraine.csv')
pd.set_option('display.float_format', '{:0.2f}'.format)

In [17]:
df.shape

(400, 24)

In [18]:
print(df.groupby('Type').size())

Type
Basilar-type aura                 18
Familial hemiplegic migraine      24
Migraine without aura             60
Other                             17
Sporadic hemiplegic migraine      14
Typical aura with migraine       247
Typical aura without migraine     20
dtype: int64


In [19]:
from sklearn.preprocessing import MinMaxScaler

X = df.iloc[:, :-1]
y = df['Type']

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)

In [20]:
y.unique().shape

(7,)

### KNN Classifier

In [21]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
print("KNN Precision:\t ", format(knn.score(X_train, y_train)))
print("KNN Accuracy:\t ", metrics.accuracy_score(y_test, y_pred))

KNN Precision:	  0.9
KNN Accuracy:	  0.75


In [22]:
from sklearn.metrics import classification_report

y_pred = knn.predict(X_test)

print(classification_report(y_true = y_test, y_pred = y_pred))

                               precision    recall  f1-score   support

            Basilar-type aura       1.00      0.25      0.40         8
 Familial hemiplegic migraine       0.43      0.60      0.50         5
        Migraine without aura       0.67      0.55      0.60        11
                        Other       0.00      0.00      0.00         4
 Sporadic hemiplegic migraine       0.00      0.00      0.00         5
   Typical aura with migraine       0.77      0.95      0.85        62
Typical aura without migraine       1.00      1.00      1.00         5

                     accuracy                           0.75       100
                    macro avg       0.55      0.48      0.48       100
                 weighted avg       0.70      0.75      0.70       100



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
set(y_test) - set(y_pred)

{'Other', 'Sporadic hemiplegic migraine'}

In [24]:
predictions = knn.predict(X)

In [25]:
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

print('współczynnik zarysu:\t\t', silhouette_score(X, predictions))
print('indeks Calińskiego-Harabasza:\t', calinski_harabasz_score(X, predictions))
print('indeks Daviesa-Bouldina:\t', davies_bouldin_score(X, predictions))

współczynnik zarysu:		 0.09979366026040562
indeks Calińskiego-Harabasza:	 17.54815507289581
indeks Daviesa-Bouldina:	 2.2466269255092906


In [26]:
from sklearn.naive_bayes import CategoricalNB

gnb = CategoricalNB()

y_pred = gnb.fit(X_train, y_train).predict(X_test)

In [27]:
from sklearn.metrics import classification_report

y_pred = gnb.predict(X_test)

print(classification_report(y_true = y_test, y_pred = y_pred))

                               precision    recall  f1-score   support

            Basilar-type aura       1.00      0.88      0.93         8
 Familial hemiplegic migraine       0.75      0.60      0.67         5
        Migraine without aura       0.29      0.18      0.22        11
                        Other       1.00      1.00      1.00         4
 Sporadic hemiplegic migraine       0.00      0.00      0.00         5
   Typical aura with migraine       0.76      0.92      0.83        62
Typical aura without migraine       0.67      0.40      0.50         5

                     accuracy                           0.75       100
                    macro avg       0.64      0.57      0.59       100
                 weighted avg       0.69      0.75      0.71       100



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
