# Check on Proj-C Predictions Format

This notebook performs a quick test on the format of the data including checking that the files are named correctly and that the predictions are in the correct format. This is not checking for accuracy of the predictions.

In [10]:
import glob 
import os
import numpy as np

# Specifying the folder containg the data
folderPred = "./output/final"

# Getting the list of subject files and sorting them
L = glob.glob(folderPred + '/subject_*__y.csv')
L.sort()

In [11]:
# Checking that the number of files and the names are correct
assert len(L)==4
assert os.path.basename(L[0])=='subject_009_01__y.csv'
assert os.path.basename(L[1])=='subject_010_01__y.csv'
assert os.path.basename(L[2])=='subject_011_01__y.csv'
assert os.path.basename(L[3])=='subject_012_01__y.csv'

In [12]:
# Specifying the data expected lengths of the predictions
predLen = [9498, 12270, 12940, 11330]

# Loading the data
for i in range(0,4):
    # Loading the predictions
    pred = np.genfromtxt(L[i], delimiter=',')
    
    assert len(pred.shape)==1 # Checking that this is a single column
    assert pred.shape[0]==predLen[i] # Making sure you have the correct number of data points
    assert min(pred)>=0 and max(pred)<=3 # Prediction should be either 0, 1, 2, or 3


9498 9498
12270 12270
12940 12940
11330 11330


In [27]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

pred_9_actual = np.genfromtxt("./subject_009_01__y.csv", delimiter=',')
pred_9 = np.genfromtxt("./subject_009_01__y_prediction.csv", delimiter=',')
print(confusion_matrix(pred_9_actual[:-1], pred_9+pred_9[-1]))
pred_9_actual = np.genfromtxt("./subject_010_01__y.csv", delimiter=',')
pred_9 = np.genfromtxt("./subject_010_01__y_prediction.csv", delimiter=',')
print(confusion_matrix(pred_9_actual[:-1], pred_9))
pred_9_actual = np.genfromtxt("./subject_011_01__y.csv", delimiter=',')
pred_9 = np.genfromtxt("./subject_011_01__y_prediction.csv", delimiter=',')
print(confusion_matrix(pred_9_actual[:-1], pred_9))
pred_9_actual = np.genfromtxt("./subject_012_01__y.csv", delimiter=',')
pred_9 = np.genfromtxt("./subject_012_01__y_prediction.csv", delimiter=',')
print(confusion_matrix(pred_9_actual[:-1], pred_9))

[[5285  382   80 1258]
 [ 419   85    1   53]
 [  12    0   84    0]
 [1336   12    4  486]]
[[5942  901 1308  665]
 [ 305   44   17    5]
 [ 218   47   95   15]
 [1909  331  346  121]]
[[8613  173  191  947]
 [  26  380    1   13]
 [  20    0  574   13]
 [ 300    5   17 1666]]
[[8402  138  251  760]
 [  37  466    1   10]
 [  59    1  663    1]
 [  56   22    6  456]]


In [34]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
pred_9_actual = np.genfromtxt("./subject_009_01__y.csv", delimiter=',')
pred_9 = np.genfromtxt("./output/latest/subject_009_01__y.csv", delimiter=',')
print(confusion_matrix(pred_9_actual, pred_9+pred_9[-1]))
pred_9_actual = np.genfromtxt("./subject_010_01__y.csv", delimiter=',')
pred_9 = np.genfromtxt("./output/latest/subject_010_01__y.csv", delimiter=',')
print(confusion_matrix(pred_9_actual, pred_9))
pred_9_actual = np.genfromtxt("./subject_011_01__y.csv", delimiter=',')
pred_9 = np.genfromtxt("./output/latest/subject_011_01__y.csv", delimiter=',')
print(confusion_matrix(pred_9_actual, pred_9))
pred_9_actual = np.genfromtxt("./subject_012_01__y.csv", delimiter=',')
pred_9 = np.genfromtxt("./output/latest/subject_012_01__y.csv", delimiter=',')
print(confusion_matrix(pred_9_actual, pred_9))


[[4961  450  118 1477]
 [ 371  110    0   77]
 [   0    1   95    0]
 [1262   18    0  558]]
[[6425  135  161 2096]
 [  23  337    2    9]
 [  19    0  333   23]
 [ 800   15    5 1887]]
[[8331  202  224 1168]
 [  12  392    1   15]
 [  15    0  581   11]
 [ 404   18   17 1549]]
[[7956  151  235 1210]
 [  25  485    1    3]
 [  48    1  672    3]
 [  83   24    4  429]]
