/
visualizations.py
101 lines (77 loc) · 3.08 KB
/
visualizations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, ConfusionMatrixDisplay
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix
# Combining all metrics in the right format for visualizations
def pRFSbyPredictor(groundTruth, predictorNames, predictorData):
measures = ['macro', 'micro', 'weighted']
pRFS_precision = []
pRFS_recall = []
pRFS_f1 = []
measureArr = [[], [], []]
cMatrixArr = []
for i in range(len(predictorNames)):
#print(predictorNames[i])
for m in range(len(measures)):
pRFS_precision = []
pRFS_recall = []
pRFS_f1 = []
tempVar = precision_recall_fscore_support(groundTruth, predictorData[i], average=measures[m])
#print(tempVar)
pRFS_precision.append(round(tempVar[0], 3))
pRFS_recall.append(round(tempVar[1], 3))
pRFS_f1.append(round(tempVar[2], 3))
measureArr[m].append([pRFS_precision[0], pRFS_recall[0], pRFS_f1[0]])
cMatrixArr.append(confusion_matrix(groundTruth, predictorData[i]))
# Precision, Recall, F1, (support)
pRFS_Df = pd.DataFrame(index=predictorNames)
pRFS_Df['macro'] = measureArr[0]
pRFS_Df['micro'] = measureArr[1]
pRFS_Df['weighted'] = measureArr[2]
pRFS_Df['ConfMatrix'] = cMatrixArr
return pRFS_Df
# Confusion Matrix
def genCFMPlotfromPred(groundTruth,predLabels,predData,fileLabel, path):
# print(type(groundTruth))
# print(groundTruth)
# print(type(predLabels))
# print(predLabels)
# print(type(predData))
# print(predData)
for i in range(len(predLabels)):
ConfusionMatrixDisplay.from_predictions(groundTruth, predData[i], cmap='plasma')
if '(' in predLabels[i]:
cleanLabel = predLabels[i].split("(")[0]
elif '_' in predLabels[i]:
cleanLabel = predLabels[i].split("_")[0]
else:
cleanLabel = predLabels[i]
plt.title(cleanLabel)
plt.savefig(path + fileLabel + "_" + str(predLabels[i]) + '.pdf', format='pdf')
#genCFMPlotfromPred(real_values,predNames,predData_all,'testCFM')
# Bar Plot
def genBars(data, fileLabel, path):
labels = data.index
for i in range(len(labels)):
y1= data.loc[labels[i]][0]
y2= data.loc[labels[i]][1]
y3= data.loc[labels[i]][2]
width = 0.2
x = np.arange(3)
plt.clf()
plt.bar(x-0.2, y1, width, color='slategrey')
plt.bar(x, y2, width, color='lightsteelblue')
plt.bar(x+0.2, y3, width, color='lavender')
if '(' in labels[i]:
cleanLabel = labels[i].split("(")[0]
elif '_' in labels[i]:
cleanLabel = labels[i].split("_")[0]
else:
cleanLabel = labels[i]
plt.xlabel(cleanLabel)
plt.xticks(x, ['Micro', 'Macro', 'Weighted'])
plt.legend(["Precision", "Recall", "F1"],loc='lower left')
plt.ylabel("Scores")
plt.ylim(0.0, 1.0)
plt.savefig(path + fileLabel + "_" + str(labels[i]) + '.pdf', format='pdf')