In [1]:
'''
Classification of "radio" samples using DecisionTree
Script loads data from /radio folder and classifies all samples as:
L - layman
R - resident
S - radiology specialist  

@author: pawel@kasprowski.pl
'''
import loader

from sklearn.metrics import classification_report,confusion_matrix,accuracy_score, cohen_kappa_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.tree import DecisionTreeClassifier

In [3]:
samples,labels,_ = loader.load_files("radio",700)
print("shape = {}".format(samples.shape))
    
#flatten
samples = samples.reshape(-1,samples.shape[1]*samples.shape[2])

#one-hot encoding
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
classesNum = labels.shape[1]
print ("Classes: {}".format(classesNum))
 
#split to training and test
(trainSamples, testSamples, trainLabels, testLabels) = train_test_split(samples, labels, test_size=0.25, random_state=42)
   
model = DecisionTreeClassifier()
model.fit(trainSamples, trainLabels)    
treeResults = model.predict(testSamples)
print(confusion_matrix(testLabels.argmax(axis=1), treeResults.argmax(axis=1)))
print(classification_report(testLabels.argmax(axis=1), treeResults.argmax(axis=1)))
treeAcc = accuracy_score(testLabels.argmax(axis=1), treeResults.argmax(axis=1)) 
print("Accuracy Tree: {:.2f}".format(treeAcc))
print("Cohen's Kappa {:.2f}".format(cohen_kappa_score(testLabels.argmax(axis=1), treeResults.argmax(axis=1))))

shape = (611, 700, 5)
Classes: 3
[[12  5 19]
 [ 5 23 23]
 [ 8 25 33]]
              precision    recall  f1-score   support

           0       0.48      0.33      0.39        36
           1       0.43      0.45      0.44        51
           2       0.44      0.50      0.47        66

    accuracy                           0.44       153
   macro avg       0.45      0.43      0.43       153
weighted avg       0.45      0.44      0.44       153

Accuracy Tree: 0.44
Cohen's Kappa 0.12
