In [1]:
import numpy as np
import pickle
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score 

In [2]:
def calculateMetrics(pred, values, toRemove):
    y = pred['y']
    yp = pred['yp']
    #print(y.shape)
    #print(yp.shape)
    #print([np.argwhere(values==r)[0][0] for r in toRemove])
    if not toRemove is None:
        y = np.delete(y, [np.argwhere(values==r)[0][0] for r in toRemove], axis=1)
        yp = np.delete(yp, [np.argwhere(values==r)[0][0] for r in toRemove], axis=1)
        
        #print(y.shape)
        #print(yp.shape)
        rowsToDelete = np.argwhere(np.amax(y,1)==0)
        y = np.delete(y, rowsToDelete, axis=0)
        yp = np.delete(yp, rowsToDelete, axis=0)
          
    ypp = np.zeros_like(yp)
    for i,j in enumerate(np.argmax(yp, axis=1)):
        ypp[i,j] = 1.
    
    acc = [accuracy_score(y[:,c], ypp[:,c]) for c in range(y.shape[1])]
    f1 = [f1_score(y[:,c], ypp[:,c], labels=[0., 1.]) for c in range(y.shape[1])]
    
    return acc,f1
    
    

In [3]:
toRemoveS = pickle.load(open("toRemoveSite.p", 'rb'))
toRemoveM = pickle.load(open("toRemoveMorfo.p", 'rb'))

In [4]:
valuesS = pickle.load(open('corpusTemporal/valuesTemporal.p', 'rb'))
valuesM = pickle.load(open('corpusTemporalV2b/valuesTemporalMorfo1.p', 'rb'))

## calculate examples

In [5]:
def calculateLengths(corpus, key, toRemove):
    count = {d:len([i for i in corpus[d][key] if i not in toRemove]) for d in corpus}
    print(count)

In [6]:
corpusS =  pickle.load(open('corpusTemporal/corpusTemporal.p', 'rb'))
corpusM =  pickle.load(open('corpusTemporalV2b/corpusTemporal.p', 'rb'))
keyS = 'sede1'
keyM = 'morfo1'

In [7]:
calculateLengths(corpusS, keyS, toRemoveS)

{'train': 50875, 'valid': 17007, 'test': 17015}


In [8]:
calculateLengths(corpusM, keyM, toRemoveM)

{'train': 49436, 'valid': 16787, 'test': 16719}


# Site

In [9]:
accS = {}
f1S = {}

In [10]:
accS['GRU'],f1S['GRU'] = calculateMetrics(pickle.load(open('predictionGRU-Asite.p', 'rb')), valuesS, toRemoveS)

In [11]:
accS['MAXi'],f1S['MAXi'] = calculateMetrics(pickle.load(open('predictionMAXi-Asite.p', 'rb')), valuesS, toRemoveS)

In [12]:
accS['MAXh'],f1S['MAXh'] = calculateMetrics(pickle.load(open('predictionMAXh-Asite.p', 'rb')), valuesS, toRemoveS)

In [13]:
accS['ATTh'],f1S['ATTh'] = calculateMetrics(pickle.load(open('predictionATTh-Asite.p', 'rb')), valuesS, toRemoveS)

In [14]:
accS['ATT'],f1S['ATT'] = calculateMetrics(pickle.load(open('predictionATT-Asite.p', 'rb')), valuesS, toRemoveS)

In [15]:
accS['MAX'],f1S['MAX'] = calculateMetrics(pickle.load(open('predictionMAX-Asite.p', 'rb')), valuesS, toRemoveS)

In [16]:
accS['SVM'],f1S['SVM'] = calculateMetrics(pickle.load(open('predictionsSVM-Asite.p', 'rb')), valuesS, toRemoveS)

In [17]:
#accS['XGBoost'],f1S['XGBoost'] = calculateMetrics(pickle.load(open('predictionsXGBoost-Asite.p', 'rb')), valuesS, toRemoveS)

In [18]:
accS['BERT'],f1S['BERT'] = calculateMetrics(pickle.load(open('predictionsBERT-Asite.p', 'rb')), valuesS, toRemoveS)

In [19]:
accS['CNN'],f1S['CNN'] = calculateMetrics(pickle.load(open('predictionsCNN-Asite.p', 'rb')), valuesS, toRemoveS)

# Morpho

In [20]:
accM = {}
f1M = {}

In [21]:
accM['GRU'],f1M['GRU'] = calculateMetrics(pickle.load(open('predictionGRU-Amorpho.p', 'rb')), valuesM, toRemoveM)

In [22]:
accM['MAXi'],f1M['MAXi'] = calculateMetrics(pickle.load(open('predictionMAXi-Amorpho.p', 'rb')), valuesM, toRemoveM)

In [23]:
accM['MAXh'],f1M['MAXh'] = calculateMetrics(pickle.load(open('predictionMAXh-Amorpho.p', 'rb')), valuesM, toRemoveM)

In [24]:
accM['ATTh'],f1M['ATTh'] = calculateMetrics(pickle.load(open('predictionATTh-Amorpho.p', 'rb')), valuesM, toRemoveM)

In [25]:
accM['ATT'],f1M['ATT'] = calculateMetrics(pickle.load(open('predictionATT-Amorpho.p', 'rb')), valuesM, toRemoveM)

In [26]:
accM['MAX'],f1M['MAX'] = calculateMetrics(pickle.load(open('predictionMAX-Amorpho.p', 'rb')), valuesM, toRemoveM)

In [27]:
accM['SVM'],f1M['SVM'] = calculateMetrics(pickle.load(open('predictionsSVM-Amorpho.p', 'rb')), valuesM, toRemoveM)

In [28]:
#accM['XGBoost'],f1M['XGBoost'] = calculateMetrics(pickle.load(open('predictionsXGBoost-Amorpho.p', 'rb')), valuesM, toRemoveM)

In [29]:
accM['BERT'],f1M['BERT'] = calculateMetrics(pickle.load(open('predictionsBERT-Amorpho.p', 'rb')), valuesM, toRemoveM)

In [30]:
accM['CNN'],f1M['CNN'] = calculateMetrics(pickle.load(open('predictionsCNN-Amorpho.p', 'rb')), valuesM, toRemoveM)

In [31]:
pickle.dump(accS, open('matrixClasses-accuracySite.p', 'wb'))
pickle.dump(f1S, open('matrixClasses-f1Site.p', 'wb'))
pickle.dump(accM, open('matrixClasses-accuracyMorpho.p', 'wb'))
pickle.dump(f1M, open('matrixClasses-f1Morpho.p', 'wb'))