In [None]:
# we assume models are already created
# these are the global variables used when computing all the metrics

import fasttext

_dataDir = "../../data/"
ftlabel = "__label__"
temas = [1,2,3,4]

# best models in with no pretrained vectors in _model_<tema>_best_at_<k>.bin
# best models in with pretrained vectors in _model_<tema>_best_at_<k>.bin

# prefix for the models to load THIS IS THE ONLY LINE TO CHANGE TO CHARGE OTHER MODELS
model_name = "_model_"

### First load all the models and the test data (dev+test)

In [1]:
# load best models at 1
models_at_1 = [None]*5
models_at_5 = [None]*5
for i in temas:
    models_at_1[i] = fasttext.load_model(model_name + str(i) + "_best_at_1.bin")
    models_at_5[i] = fasttext.load_model(model_name + str(i) + "_best_at_5.bin")
 

In [2]:
# utility function for reading list values for fasttext

import string

def read_text_file_for_ft_input(filename):
    with open(filename) as f:
        out = []
        for line in f:
            strdata = "".join([c for c in line[:-1] if c not in string.punctuation]).lower()
            ### This is because I found a string in the data that is empty after removing punctuation
            if strdata == '':
                strdata = ' '
            out.append(strdata)
    return out

def read_numbers_file_for_ft_input(filename):
    with open(filename) as f:
        out = []
        for line in f:
            out.append(int(line))
    return out

In [3]:
# load data to predict

#train_x = [None]*5
#train_y = [None]*5

test_x = [None]*5
test_y = [None]*5

dev_x = [None]*5
dev_y = [None]*5

for i in temas:

#    train_x[i] = read_text_file_for_ft_input(
#        _dataDir + "x_train_tema_" + str(i) + "_categorias_pnud_0.txt")
#    train_y[i] = read_numbers_file_for_ft_input(
#        _dataDir + "y_train_tema_" + str(i) + "_categorias_pnud_0.txt")
    
    dev_x[i] = read_text_file_for_ft_input(
        _dataDir + "x_dev_tema_" + str(i) + "_categorias_pnud_0.txt")
    dev_y[i] = read_numbers_file_for_ft_input(
        _dataDir + "y_dev_tema_" + str(i) + "_categorias_pnud_0.txt")
    
    test_x[i] = read_text_file_for_ft_input(
        _dataDir + "x_test_tema_" + str(i) + "_categorias_pnud_0.txt")
    test_y[i] = read_numbers_file_for_ft_input(
        _dataDir + "y_test_tema_" + str(i) + "_categorias_pnud_0.txt")



### Compute the predictions

In [4]:
#### sizes = [None,5,5,5,5]

#predict_at_1_train = [None]*5
#predict_at_5_train = [None]*5

predict_at_1_dev = [None]*5
predict_at_5_dev = [None]*5

predict_at_1_test = [None]*5
predict_at_5_test = [None]*5


# size of the list of predicted 
sizes = [None,1,1,1,1]
for i in temas:
#    predict_at_1_train[i] = models_at_1[i].predict(train_x[i],k= sizes[i])
    predict_at_1_dev[i] = models_at_1[i].predict(dev_x[i],k= sizes[i])
    predict_at_1_test[i] = models_at_1[i].predict(test_x[i],k= sizes[i])

# size of the list of predicted 
sizes = [None,5,5,5,5]
for i in temas:    
#    predict_at_5_train[i] = models_at_5[i].predict(train_x[i],k= sizes[i])   
    predict_at_5_dev[i] = models_at_5[i].predict(dev_x[i],k= sizes[i])    
    predict_at_5_test[i] = models_at_5[i].predict(test_x[i],k= sizes[i])    

  

In [6]:
# transform labels to int values
# this is needed because fasttext produces precitions as __label__<number_of_category>

def label_to_int(L):
    for i in temas:
        for j in range(len(L[i])):
            for k in range(len(L[i][j])):
                L[i][j][k] = int(L[i][j][k][9:])
                           
#label_to_int(predict_at_1_train)
#label_to_int(predict_at_5_train)
label_to_int(predict_at_1_dev)
label_to_int(predict_at_5_dev)
label_to_int(predict_at_1_test)
label_to_int(predict_at_5_test)


# utility function to select the first prediction from a list of predictions
def first_prediction(L):
    out = []
    for l in L:
        out.append(l[0])
    return out

<br/>
<br/>
<br/>

# Metrics on dev + test set with best models accodring to recall@1

### Metrics for predictions on dev set

In [8]:
from sklearn import metrics

for i in [1,2,3,4]:
    # load categories first
    categoriesFile = _dataDir + "categorias_tema_" + str(i) + "_pnud_0.txt"
    categories = []
    with open(categoriesFile) as f:
        for line in f:
            categories.append(line[:-1])

    predicted = first_prediction(predict_at_1_dev[i])
    print("Tema " + str(i))
    print(metrics.classification_report(dev_y[i],predicted,target_names=categories))

Tema 1
                                                          precision    recall  f1-score   support

                                          Amistad cívica       0.20      0.04      0.06        26
                                    Autonomía / Libertad       0.66      0.62      0.64       168
                                  Bien Común / Comunidad       0.60      0.66      0.63       275
                                              Ciudadanía       0.00      0.00      0.00        17
                                              Democracia       0.64      0.76      0.69       380
                                              Desarrollo       0.35      0.37      0.36        52
                                       Descentralización       0.87      0.90      0.88       307
                                                Dignidad       0.61      0.67      0.64       193
                                              Diversidad       0.36      0.26      0.30        73
            

  'precision', 'predicted', average, warn_for)


### Metrics for predictions on test set

In [9]:
from sklearn import metrics

for i in [1,2,3,4]:
    # load categories first
    categoriesFile = _dataDir + "categorias_tema_" + str(i) + "_pnud_0.txt"
    categories = []
    with open(categoriesFile) as f:
        for line in f:
            categories.append(line[:-1])

    predicted = first_prediction(predict_at_1_test[i])
    print("Tema " + str(i))
    print(metrics.classification_report(test_y[i],predicted,target_names=categories))

Tema 1
                                                          precision    recall  f1-score   support

                                          Amistad cívica       0.22      0.07      0.11        27
                                    Autonomía / Libertad       0.68      0.64      0.66       168
                                  Bien Común / Comunidad       0.65      0.73      0.69       276
                                              Ciudadanía       0.00      0.00      0.00        16
                                              Democracia       0.60      0.73      0.66       380
                                              Desarrollo       0.55      0.40      0.47        52
                                       Descentralización       0.87      0.92      0.89       307
                                                Dignidad       0.60      0.63      0.62       193
                                              Diversidad       0.42      0.24      0.30        72
            

  'precision', 'predicted', average, warn_for)



<br/>
<br/>
<br/>

# Metrics on dev + test set with best models accodring to recall@5

### Computing recall@5 defined as the total number of labels correctly predicted among the first 5 best ranked classes for every test case.

For example, for a test case in which the true class is 7 and the top 5 predicted classes are 4,3,7,8,10, we count 1. In contrast, if for a test case in which the true class is 7 and the top 5 predicted classes are 1,2,3,4,5, we do not count.

In [None]:
# utility function to compute top k accuracy

def top_k_accuracy(gold,predicted,k):
    '''
    #Arguments
        gold: the true labels of the test cases (size N = number of test cases)
        predicted: ranked list of label predictions for every test case (size N x L, where L is assumed to be >= k)
        k: the number of elements in the predicted lists that should be considered to compute the metric
    #Returns
        The portion of cases (between 0 and 1) in which the true label value was among the first k predicted labels
    '''
    count = 0
    for g,pred_labels in zip(gold,predicted):
        if g in pred_labels[:k]:
                count += 1
    return count/len(gold)

In [36]:
for i in temas:   
    print("Tema " + str(i))
    print("---------")
    print("dev: \t" + str(round(100*top_k_accuracy(dev_y[i],predict_at_5_dev[i],5),1)) + "%")
    print("test: \t" + str(round(100*top_k_accuracy(test_y[i],predict_at_5_test[i],5),1)) + "%")     
    print()

Tema 1
---------
dev: 	89.5%
test: 	89.4%

Tema 2
---------
dev: 	91.7%
test: 	90.5%

Tema 3
---------
dev: 	95.8%
test: 	95.7%

Tema 4
---------
dev: 	91.6%
test: 	91.0%



### Computing average ranking size for correct predictions and size of ranking for different percentiles

In [45]:
import numpy as np

def ranking_sizes(gold,predicted):
    r_sizes = []
    for g,pred_labels in zip(gold,predicted):
        if g not in pred_labels:
            raise Exception('Label ' + str(g) + ' is not in the ranking. Unable to compute an average')
        r_sizes.append(pred_labels.index(g) + 1)
    return np.array(r_sizes)


In [69]:
# first compute the complete ranking
predict_total_test = [None]*5

# size of the list of predicted 
sizes = [None,37,44,12,21]
for i in temas:    
    predict_total_test[i] = models_at_5[i].predict(test_x[i],k= sizes[i])    

label_to_int(predict_total_test)

In [89]:
print("\t\tavgPos\t80%\t90%\t95%")

for i in temas:
    r_sizes = ranking_sizes(test_y[i],predict_total_test[i])

    
    m = round(np.mean(r_sizes),2)
    p1 = int(np.percentile(r_sizes,80))
    p2 = int(np.percentile(r_sizes,90))
    p3 = int(np.percentile(r_sizes,95))
    print("Tema " + str(i) + ":\t\t" + str(m) + "\t" + str(p1) + "\t" + str(p2) + "\t" + str(p3))

		avgPos	80%	90%	95%
Tema 1:		2.87	3	6	11
Tema 2:		2.84	2	5	12
Tema 3:		1.65	2	3	5
Tema 4:		2.3	2	5	9



<br/>
<br/>
<br/>

# More experiments

### Predictions on test set with best model according to recall@1 considering 'concepto_original' + 'fundamento'

In [10]:
### what if we concatenate the 'concepto original'???

sizes = [None,1,1,1,1]

for i in temas:
    categoriesFile = _dataDir + "categorias_tema_" + str(i) + "_pnud_0.txt"
    categories = []
    with open(categoriesFile) as f:
        for line in f:
            categories.append(line[:-1])

    # create a new list for testing test_x_plus_category 
    test_x_plus_category = []
    test_x_category = []
    for f,cat in zip(test_x[i],test_y[i]):
        str_category = "".join([c for c in categories[cat] if c not in string.punctuation]).lower()
        test_x_plus_category.append(str_category + " " + f)
            
    predicted = first_prediction(models_at_1[i].predict(test_x_plus_category,k= sizes[i]))
    predicted_ints = []
    for l in predicted:
        predicted_ints.append(int(l[9:]))

                    
    print("Tema " + str(i))
    print(metrics.classification_report(test_y[i],predicted_ints,target_names=categories))

Tema 1
                                                          precision    recall  f1-score   support

                                          Amistad cívica       1.00      0.52      0.68        27
                                    Autonomía / Libertad       0.90      0.98      0.94       168
                                  Bien Común / Comunidad       0.87      0.98      0.92       276
                                              Ciudadanía       0.00      0.00      0.00        16
                                              Democracia       0.77      0.89      0.82       380
                                              Desarrollo       0.91      0.56      0.69        52
                                       Descentralización       0.97      0.98      0.98       307
                                                Dignidad       0.89      0.88      0.89       193
                                              Diversidad       0.59      0.47      0.52        72
            

  'precision', 'predicted', average, warn_for)


Tema 2
                                                   precision    recall  f1-score   support

                                         A huelga       0.97      0.83      0.89        35
                                   A la educación       0.95      0.97      0.96       509
                            A la honra / Al honor       1.00      0.17      0.29         6
                          A la identidad cultural       0.93      0.79      0.85        33
                                 A la información       0.73      0.91      0.81        75
                 A la integración de discapacidad       0.94      0.98      0.96        62
                A la integridad física y psíquica       0.84      0.85      0.85        61
                                A la nacionalidad       1.00      0.79      0.88        14
                               A la participación       0.89      0.91      0.90       116
                                       A la salud       0.97      0.98      0.97  

### Predictions on test considering only 'concepto_original'

In [11]:
### what if we only use the name of the class???

for i in [1,2,3,4]:
    categoriesFile = _dataDir + "categorias_tema_" + str(i) + "_pnud_0.txt"
    categories = []
    gold = []
    k = 0
    with open(categoriesFile) as f:
        for line in f:
            str_category = "".join([c for c in line[:-1] if c not in string.punctuation]).lower()
            categories.append(str_category)
            gold.append(k)
            k+=1
#    print(categories)
#    print(gold)
     
    predicted = models_at_1[i].predict(categories,k= 1)
    predicted = first_prediction(models_at_1[i].predict(categories,k= 1))
    predicted_ints = []
    for l in predicted:
        predicted_ints.append(int(l[9:]))
#    print(predicted_ints)
                    
    print("Tema " + str(i))
    print(metrics.classification_report(gold,predicted_ints,target_names=categories))
    

Tema 1
                                                         precision    recall  f1-score   support

                                         amistad cívica       0.50      1.00      0.67         1
                                    autonomía  libertad       1.00      1.00      1.00         1
                                  bien común  comunidad       1.00      1.00      1.00         1
                                             ciudadanía       0.00      0.00      0.00         1
                                             democracia       0.33      1.00      0.50         1
                                             desarrollo       1.00      1.00      1.00         1
                                      descentralización       1.00      1.00      1.00         1
                                               dignidad       1.00      1.00      1.00         1
                                             diversidad       1.00      1.00      1.00         1
                      

  'precision', 'predicted', average, warn_for)
