In [45]:
from PIL import Image
import glob
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

In [46]:
labels = pd.read_csv("Xray_TeethLabels_Simple.csv",index_col=0)
Ylabels = labels[5:]
metaY = labels[:3]
X = pd.DataFrame()
Y = []
for picFilename in glob.glob("processed2/processed/*"):
    #print(picFilename)
    patNumber = picFilename.split('/')[2].split('_')[0]
    toothNumber = picFilename.split('/')[2].split('_')[1].split('.')[0]
    im = Image.open(picFilename)
    X[patNumber + '_' + toothNumber] = np.array(im).flatten()
    Y.append(Ylabels.loc[toothNumber,patNumber])

X_train, X_test, y_train, y_test = train_test_split(X.T.to_numpy(), np.array(Y), test_size=0.20, random_state=100)

In [47]:
np.random.seed(100)
def svc_param_selection(X, y, nfolds):
    C = [0.1, 1, 10, 100, 1000] #try different values for C
    kernel = ['linear', 'rbf']
    param_grid = {'C': C, 'kernel': kernel}
    grid_search = GridSearchCV(SVC(), param_grid, cv=nfolds)
    grid_search.fit(X, y)
    grid_search.best_params_
    return grid_search.best_params_
best_c = svc_param_selection(X.T.to_numpy(), np.array(Y), 2)
print ("The best value for C is:",best_c)

The best value for C is: {'C': 1, 'kernel': 'rbf'}


In [48]:
model = SVC(C=best_c['C'], kernel=best_c['kernel'])
model.fit(X_train, y_train)
print(model)
# make predictions
expected = y_test
predicted = model.predict(X_test)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

SVC(C=1)
              precision    recall  f1-score   support

          No       0.59      0.87      0.70        23
         Yes       0.73      0.36      0.48        22

    accuracy                           0.62        45
   macro avg       0.66      0.62      0.59        45
weighted avg       0.66      0.62      0.60        45

[[20  3]
 [14  8]]


In [49]:
tn, fp, fn, tp = metrics.confusion_matrix(y_test, predicted).ravel()
print(tn)
print(tp)
print(fp)
print(fn)
print(f"sensitivity: {tp/(tp+fn)}")
print(f"Specificity: {tn/(tn+fp)}")
print(f"FPR: {fp/(fp+tn)}")
print(f" FNR: {fn/(fn+tp)}")
print(f"Accuracy: {(tp+tn)/(tp+tn+fp+fn)}")
precision = tp/(tp+fp)
recall = tp/(tp+fn)
print(f"precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 score: {(2*precision*recall)/(precision+recall)}")

20
8
3
14
sensitivity: 0.36363636363636365
Specificity: 0.8695652173913043
FPR: 0.13043478260869565
 FNR: 0.6363636363636364
Accuracy: 0.6222222222222222
precision: 0.7272727272727273
Recall: 0.36363636363636365
F1 score: 0.4848484848484849
