In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv('../data/migraine.csv')
pd.set_option('display.float_format', '{:0.2f}'.format)

from sklearn.preprocessing import MinMaxScaler

X = df.iloc[:, :-1]
y = df['Type']

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)

In [2]:
from sklearn.naive_bayes import CategoricalNB

gnb = CategoricalNB()

y_pred = gnb.fit(X_train, y_train).predict(X_test)

In [3]:
from sklearn.metrics import classification_report

y_pred = gnb.predict(X_test)

print(classification_report(y_true = y_test, y_pred = y_pred))

                               precision    recall  f1-score   support

            Basilar-type aura       1.00      0.88      0.93         8
 Familial hemiplegic migraine       0.75      0.60      0.67         5
        Migraine without aura       0.29      0.18      0.22        11
                        Other       1.00      1.00      1.00         4
 Sporadic hemiplegic migraine       0.00      0.00      0.00         5
   Typical aura with migraine       0.76      0.92      0.83        62
Typical aura without migraine       0.67      0.40      0.50         5

                     accuracy                           0.75       100
                    macro avg       0.64      0.57      0.59       100
                 weighted avg       0.69      0.75      0.71       100



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [4]:
set(y_test) - set(y_pred)

{'Sporadic hemiplegic migraine'}

In [5]:

class_labels = gnb.classes_
class_probabilities = dict(zip(class_labels, gnb.predict_proba(X_test)[0]))
print(class_probabilities)

{'Basilar-type aura': 0.5219828219977022, 'Familial hemiplegic migraine': 0.3652847990707491, 'Migraine without aura': 0.03257288105908088, 'Other': 0.013512887101155632, 'Sporadic hemiplegic migraine': 0.0009872882623901493, 'Typical aura with migraine': 0.041516076816080265, 'Typical aura without migraine': 0.024143245692840914}


In [6]:
probs = gnb.predict_proba(X_test)
class_labels = gnb.classes_
class_probabilities = []
for i in range(len(probs)):
    class_probabilities.append(dict(zip(class_labels, probs[i])))
print(class_probabilities[0])
print(class_probabilities[1])
print(" ")


for i in range(len(class_probabilities)):
    print(y_pred[i])
    for klucz, wartosc in class_probabilities[i].items():
        print(f"{wartosc}: \t{klucz}")
    print(" ")


print(len(class_probabilities))

{'Basilar-type aura': 0.5219828219977022, 'Familial hemiplegic migraine': 0.3652847990707491, 'Migraine without aura': 0.03257288105908088, 'Other': 0.013512887101155632, 'Sporadic hemiplegic migraine': 0.0009872882623901493, 'Typical aura with migraine': 0.041516076816080265, 'Typical aura without migraine': 0.024143245692840914}
{'Basilar-type aura': 5.403821314529687e-05, 'Familial hemiplegic migraine': 0.001178682635343969, 'Migraine without aura': 0.0031613473696747267, 'Other': 3.147570276007165e-05, 'Sporadic hemiplegic migraine': 0.0023654061267660525, 'Typical aura with migraine': 0.9866277021151934, 'Typical aura without migraine': 0.0065813478371166585}
 
Basilar-type aura
0.5219828219977022: 	Basilar-type aura
0.3652847990707491: 	Familial hemiplegic migraine
0.03257288105908088: 	Migraine without aura
0.013512887101155632: 	Other
0.0009872882623901493: 	Sporadic hemiplegic migraine
0.041516076816080265: 	Typical aura with migraine
0.024143245692840914: 	Typical aura withou

In [7]:
from tabulate import tabulate

for i in class_probabilities:
    print(tabulate([i],  tablefmt="plain"))


0.521983  0.365285  0.0325729  0.0135129  0.000987288  0.0415161  0.0241432
5.40382e-05  0.00117868  0.00316135  3.14757e-05  0.00236541  0.986628  0.00658135
0.131471  0.368016  0.082041  0.00952974  0.000497335  0.388986  0.019459
0.0804823  0.263322  0.000941676  0.00568226  0.223679  0.153616  0.272277
0.000315062  0.000846649  0.516091  0.00107907  0.00386153  0.47721  0.000596892
0.00292167  0.0154958  0.00105289  0.004765  0.098475  0.454465  0.422824
0.000315062  0.000846649  0.516091  0.00107907  0.00386153  0.47721  0.000596892
0.000315062  0.000846649  0.516091  0.00107907  0.00386153  0.47721  0.000596892
0.000255264  0.00122492  0.149335  0.000416315  0.00186227  0.846518  0.00038861
5.40382e-05  0.00117868  0.00316135  3.14757e-05  0.00236541  0.986628  0.00658135
0.00180621  0.0323581  0.450845  0.00235662  0.00073792  0.509158  0.00273752
0.285476  0.24173  0.00862214  0.417305  0.00130669  0.00295415  0.0426053
5.40382e-05  0.00117868  0.00316135  3.14757e-05  0.002365