In [1]:
import numpy as np
import pickle
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score 

In [2]:
def calculateMetrics(pred, values, toRemove):
    y = pred['y']
    yp = pred['yp']
    #print(y.shape)
    #print(yp.shape)
    #print([np.argwhere(values==r)[0][0] for r in toRemove])
    if not toRemove is None:
        y = np.delete(y, [np.argwhere(values==r)[0][0] for r in toRemove], axis=1)
        yp = np.delete(yp, [np.argwhere(values==r)[0][0] for r in toRemove], axis=1)
        
        #print(y.shape)
        #print(yp.shape)
        rowsToDelete = np.argwhere(np.amax(y,1)==0)
        y = np.delete(y, rowsToDelete, axis=0)
        yp = np.delete(yp, rowsToDelete, axis=0)
          
    #print(y.shape)
    #print(yp.shape)
    yb = []
    ypb = []
    
    for i in range(len(y)):
        yb.append(np.argmax(y[i]))
        ypb.append(np.argmax(yp[i]))
        
    yb = np.array(yb)
    ypb = np.array(ypb)
    
    ypb3 = np.argsort(yp,1)[:,-3:][:,::-1] #top 3 
    ypb5 = np.argsort(yp,1)[:,-5:][:,::-1] #top 5 
    
    total=0
    correct3=0
    correct5=0
    for i in range(len(yb)):
        total += 1
        if yb[i] in ypb3[i]:
            correct3 += 1
        if yb[i] in ypb5[i]:
            correct5 += 1

    accT3 = correct3/total
    accT5 = correct5/total

    acc = accuracy_score(yb, ypb)
    f1M = f1_score(yb, ypb, average="macro")
    
    print("acc: {:0.1f} - accT3: {:0.1f} - accT5: {:0.1f} - f1macro: {:0.1f}".format(acc*100, accT3*100, accT5*100, f1M*100), flush=True)
    
    #f1A = f1_score(yb, ypb, average=None)
    #print(f1A)
    

In [3]:
toRemoveS = pickle.load(open("toRemoveSite.p", 'rb'))
toRemoveM = pickle.load(open("toRemoveMorfo.p", 'rb'))

In [4]:
valuesS = pickle.load(open('corpusTemporal/valuesTemporal.p', 'rb'))
valuesM = pickle.load(open('corpusTemporalV2b/valuesTemporalMorfo1.p', 'rb'))

## calculate examples

In [5]:
def calculateLengths(corpus, key, toRemove):
    count = {d:len([i for i in corpus[d][key] if i not in toRemove]) for d in corpus}
    print(count)

In [6]:
corpusS =  pickle.load(open('corpusTemporal/corpusTemporal.p', 'rb'))
corpusM =  pickle.load(open('corpusTemporalV2b/corpusTemporal.p', 'rb'))
keyS = 'sede1'
keyM = 'morfo1'

In [7]:
calculateLengths(corpusS, keyS, toRemoveS)

{'train': 50875, 'valid': 17007, 'test': 17015}


In [8]:
calculateLengths(corpusM, keyM, toRemoveM)

{'train': 49436, 'valid': 16787, 'test': 16719}


# Site

In [9]:
calculateMetrics(pickle.load(open('predictionGRU-Asite.p', 'rb')), valuesS, toRemoveS)

acc: 89.9 - accT3: 96.5 - accT5: 97.7 - f1macro: 58.3


In [10]:
calculateMetrics(pickle.load(open('predictionMAXi-Asite.p', 'rb')), valuesS, toRemoveS)

acc: 88.0 - accT3: 95.4 - accT5: 96.2 - f1macro: 46.1


In [11]:
calculateMetrics(pickle.load(open('predictionMAXh-Asite.p', 'rb')), valuesS, toRemoveS)

acc: 89.9 - accT3: 96.2 - accT5: 97.8 - f1macro: 58.8


In [12]:
calculateMetrics(pickle.load(open('predictionATTh-Asite.p', 'rb')), valuesS, toRemoveS)

acc: 89.9 - accT3: 96.3 - accT5: 97.7 - f1macro: 58.0


In [13]:
calculateMetrics(pickle.load(open('predictionATT-Asite.p', 'rb')), valuesS, toRemoveS)

acc: 90.1 - accT3: 96.2 - accT5: 97.6 - f1macro: 60.0


In [14]:
calculateMetrics(pickle.load(open('predictionMAX-Asite.p', 'rb')), valuesS, toRemoveS)

acc: 90.3 - accT3: 96.6 - accT5: 98.1 - f1macro: 61.9


In [15]:
calculateMetrics(pickle.load(open('predictionsSVM-Asite.p', 'rb')), valuesS, toRemoveS)

acc: 89.7 - accT3: 95.9 - accT5: 96.8 - f1macro: 60.0


In [16]:
calculateMetrics(pickle.load(open('predictionsXGBoost-Asite.p', 'rb')), valuesS, toRemoveS)

acc: 89.1 - accT3: 95.8 - accT5: 97.2 - f1macro: 58.0


In [17]:
calculateMetrics(pickle.load(open('predictionsBERT-Asite.p', 'rb')), valuesS, toRemoveS)

acc: 89.9 - accT3: 96.3 - accT5: 97.8 - f1macro: 56.6


In [18]:
calculateMetrics(pickle.load(open('predictionsCNN-Asite.p', 'rb')), valuesS, toRemoveS)

acc: 89.2 - accT3: 96.0 - accT5: 97.6 - f1macro: 55.3


# Morpho

In [19]:
calculateMetrics(pickle.load(open('predictionGRU-Amorpho.p', 'rb')), valuesM, toRemoveM)

acc: 83.3 - accT3: 94.6 - accT5: 96.6 - f1macro: 55.2


In [20]:
calculateMetrics(pickle.load(open('predictionMAXi-Amorpho.p', 'rb')), valuesM, toRemoveM)

acc: 73.4 - accT3: 91.0 - accT5: 93.6 - f1macro: 31.3


In [21]:
calculateMetrics(pickle.load(open('predictionMAXh-Amorpho.p', 'rb')), valuesM, toRemoveM)

acc: 83.7 - accT3: 94.4 - accT5: 96.4 - f1macro: 54.5


In [22]:
calculateMetrics(pickle.load(open('predictionATTh-Amorpho.p', 'rb')), valuesM, toRemoveM)

acc: 83.7 - accT3: 94.4 - accT5: 96.2 - f1macro: 57.5


In [23]:
calculateMetrics(pickle.load(open('predictionATT-Amorpho.p', 'rb')), valuesM, toRemoveM)

acc: 84.8 - accT3: 94.9 - accT5: 96.9 - f1macro: 61.3


In [24]:
calculateMetrics(pickle.load(open('predictionMAX-Amorpho.p', 'rb')), valuesM, toRemoveM)

acc: 84.6 - accT3: 95.0 - accT5: 96.9 - f1macro: 59.2


In [25]:
calculateMetrics(pickle.load(open('predictionsSVM-Amorpho.p', 'rb')), valuesM, toRemoveM)

acc: 82.4 - accT3: 94.0 - accT5: 95.6 - f1macro: 53.7


In [26]:
calculateMetrics(pickle.load(open('predictionsXGBoost-Amorpho.p', 'rb')), valuesM, toRemoveM)

acc: 84.1 - accT3: 94.4 - accT5: 96.5 - f1macro: 59.6


In [27]:
calculateMetrics(pickle.load(open('predictionsBERT-Amorpho.p', 'rb')), valuesM, toRemoveM)

acc: 84.3 - accT3: 93.2 - accT5: 94.9 - f1macro: 51.1


In [28]:
calculateMetrics(pickle.load(open('predictionsCNN-Amorpho.p', 'rb')), valuesM, toRemoveM)

acc: 83.3 - accT3: 94.4 - accT5: 96.7 - f1macro: 55.0
