In [1]:
# LIBRARIES

import pandas as pd
import numpy as np
import itertools as it
from datetime import datetime
import networkx as nx
import math
import random
import os
import operator
# Documentacion de la libreria: http://networkx.readthedocs.io/en/networkx-1.11/

from operator import itemgetter
from itertools import groupby
from sklearn.model_selection import train_test_split

In [2]:
# CONSTANTS 

# DATASET_SIZE = 100000
DATASET_SIZE =  15954 # most popular movies 200 && users que han interactuado con 50%
# DATASET_SIZE =  34030 # most popular movies 200 && users que han interactuado con 25%
# DATASET_SIZE =  42482 # most popular movies 200 && users que han interactuado con 12.5%

# HALF_DATASET_SIZE = int(90*DATASET_SIZE / 100)
# HALF_DATASET_SIZE = int(75*DATASET_SIZE / 100)
# SECOND_HALF_DATASET = int(DATASET_SIZE - HALF_DATASET_SIZE)
RATING_THRESHOLD = 4
WEIGHT_THRESHOLD = 5
# K = 10
K = 3
range_K = [3,5,10]
MEASURES = ['aa', 'cn', 'ew', 'jn', 'pa', 'waa', 'wcn', 'wpa']
USERS_EVAL = 25

In [3]:
def compareNodes(f_list, s_list):
    """
        Function that returns the number of users that have interact with both items
        Funcion que devuelve el numero de usuarios que han interactuado con ambos items
    """
    peso = len(np.intersect1d(f_list, s_list))
    
    return peso
    
def createLinks(prob_us_set, nodos, threshold):
    """
        Function that creates graph links with the information about the set. The weight has to be grater or equal to threshold.
        
        Funcion que crea los enlaces del grafo a partir de la informacion contenida en el conjunto que se le
        pasa a la funcion. El peso tiene que ser mayor o igual al umbral.
        
        Format of links list -> [(Node1, Node2, weight), ......]
    """
    resultado = list() 
    
    # hago todas las posibles combinaciones de problemas
    for fst, snd in it.combinations(nodos, 2):
        # obtengo el peso pasando la lista de usuarios que ha hecho cada problema
        peso = compareNodes(prob_us_set[fst], prob_us_set[snd])
        if peso >= threshold:
            resultado.append((fst, snd, peso))
            
            
            
    return resultado

In [4]:
def create_graph_nx(list_nodes, list_links):
    """
        Function that creates a graph with the format from NetworkX 
        
        Funcion que crea un grafo de tipo Graph de la libreria NetworkX
        Construccion del grafo: http://networkx.readthedocs.io/en/networkx-1.11/tutorial/tutorial.html#what-to-use-as-nodes-and-edges
    """
    grafo = nx.Graph() # creo la variable grafo

    # incluyo los nodos del grafo 
    grafo.add_nodes_from(list_nodes)

    # se incluyen las tuplas de enlaces con el peso del enlace
    # es una lista de la forma [(Nodo1, Nodo2, peso), ......]
    grafo.add_weighted_edges_from(list_links)

    return grafo

In [5]:
def nodes_connected(u, v, graph):
    return u in graph.neighbors(v)

In [6]:
def areAccessibleUser(current_user, possible, graph):
    access = [p for p in possible if nodes_connected(current_user, p, graph)]
    return list(set(access))

In [7]:
def areAccessible(possible, acc_users, df_users_simple):
    """
        Function that check if at least one user in acc_users has interacted with each item in possible
    """
    access = list()
    for p in possible:
        encontrado = False
        cont = 0
        while (cont < len(acc_users)) and (encontrado == False):
            if p in df_users_simple[acc_users[cont]]:
                access.append(p)
                encontrado = True
            else:
                cont = cont + 1
    
    return list(set(access)) 

In [8]:
def apply_aa(i1, i2, graph):
    """
        Funcion que devuelve para cada par de nodos, el sumatorio de 1/log(N(z)), siendo N(z) el grado del nodo z para todo z 
        perteneciente al conjunto de nodos en comun de ese par de nodos
    """
    
    # obtengo un iterador de un solo elemento que tiene en la tercera posicion el valor de AA para el par de nodos
    value = nx.adamic_adar_index(graph, [(i1, i2)])
    
    value_aa = 0
    for u, v, p in value:
        # itero el iterador, guardando el valor de adar adamic
        value_aa = p
    
    return value_aa

In [9]:
def apply_cn(i1, i2, graph):
    """
        Funcion que devuelve el numero de vecinos en comun de esos dos nodos
    """
    return len(list(nx.common_neighbors(graph, i1, i2)))

In [10]:
def apply_ew(fst, snd, graph):
    """
        Funcion que devuelve el peso del enlace en cada par
    """
    
    weight = graph.get_edge_data(fst, snd)
    
    # print(weight)
    
    if weight == None: # devuelve 0 en caso de que no exista enlace
        return 0
    else: # si si existe, devuelve el peso
        return weight['weight']

In [11]:
def apply_jn(i1, i2, graph):
    """
        Funcion que devuelve el numero de vecinos en comun de esos dos nodos
    """
    values_jn = nx.jaccard_coefficient(graph, [(i1, i2)])
    
    value_jn = 0
    for u, v, p in values_jn:
        value_jn = p # saco el valor
        
    return value_jn

In [12]:
def apply_pa(i1, i2, graph):
    """
        Funcion que devuelve el valor de preferential attachment
    """
    values_pa = nx.preferential_attachment(graph, [(i1, i2)])
    
    value_pa = 0
    for u, v, p in values_pa:
        value_pa = p # saco el valor
        
    return value_pa


In [13]:
def apply_waa(i1, i2, graph):
    """
        Funcion que devuelve para cada par de nodos, el valor de weighted adar adamic
    """
    
    # primero tengo que calcular los common neighbors de ambos items
    cn_list = nx.common_neighbors(graph, i1, i2)
    
    # ahora tengo que hacer el sumatorio del valor para cada elemento de cn_list
    value_waa = sum([((graph[i1][x]['weight'] + graph[i2][x]['weight']) / math.log(1 + graph.degree(x, weight="weight"), 10) )  for x in cn_list])    
    
    
    return value_waa

In [14]:
def apply_wcn(i1, i2, graph):
    """
        Funcion que devuelve el numero de vecinos en comun de esos dos nodos
    """
    cn_list = nx.common_neighbors(graph, i1, i2)
    
    value_wcn = sum([graph[i1][x]['weight'] + graph[i2][x]['weight'] for x in cn_list])
    
    return value_wcn

In [15]:
def apply_wpa(i1, i2, graph):
    """
        Funcion que devuelve el valor de weighted preferential attachment
    """
    value_wpa = graph.degree(i1, weight="weight") * graph.degree(i2, weight="weight")
        
    return value_wpa

In [16]:
def apply_measure(i1, i2, graph, measure):
     
    """
        Function that resturn similarity value with the measure
        
    """
    # Aplico la funcion a cada fila
    if measure == 'aa':
        sim_result = apply_aa(i1, i2, graph)
    elif measure == 'cn':
        sim_result = apply_cn(i1, i2, graph)
    elif measure == 'ew':
        sim_result = apply_ew(i1, i2, graph)
    elif measure == 'jn':    
        sim_result = apply_jn(i1, i2, graph)
    elif measure == 'pa':    
        sim_result = apply_pa(i1, i2, graph)

    elif measure == 'waa':
        sim_result = apply_waa(i1, i2, graph)
    elif measure == 'wcn':    
        sim_result = apply_wcn(i1, i2, graph)
    elif measure == 'wpa':    
        sim_result = apply_wpa(i1, i2, graph)
        
    return sim_result

In [17]:
def build_matrix_users(users, graph, user):
    """ en cada fila, voy a tener el conjunto de users a evaluar
        en las columnas, todos los users del grafo"""
    
    if not os.path.exists('similarities/user_weig_50/user_' + str(user) + '/'):
        os.makedirs('similarities/user_weig_50/user_' + str(user) + '/')
                          
    for measure in MEASURES:
        with open('similarities/user_weig_50/user_' + str(user) + '/user_' + measure + '.csv', 'w') as result_file:
            print('user1,user2,similarity', file=result_file)

            for u in users:
                sim = apply_measure(user, u, graph, measure)
                print(f"{user},{u},{sim}", file=result_file)

In [18]:
def getting_sim_matrix(user):
    """ Read the arrays"""
    pd_sim_aa = pd.read_csv('similarities/user_weig_50/user_' + str(user) + '/user_aa.csv')
    pd_sim_cn = pd.read_csv('similarities/user_weig_50/user_' + str(user) + '/user_cn.csv')
    pd_sim_ew = pd.read_csv('similarities/user_weig_50/user_' + str(user) + '/user_ew.csv')
    pd_sim_jn = pd.read_csv('similarities/user_weig_50/user_' + str(user) + '/user_jn.csv')
    pd_sim_pa = pd.read_csv('similarities/user_weig_50/user_' + str(user) + '/user_pa.csv')
    pd_sim_waa = pd.read_csv('similarities/user_weig_50/user_' + str(user) + '/user_waa.csv')
    pd_sim_wcn = pd.read_csv('similarities/user_weig_50/user_' + str(user) + '/user_wcn.csv')
    pd_sim_wpa = pd.read_csv('similarities/user_weig_50/user_' + str(user) + '/user_wpa.csv')


    sim_aa = pd_sim_aa.pivot(index='user1', columns='user2', values='similarity')
    sim_cn = pd_sim_cn.pivot(index='user1', columns='user2', values='similarity')
    sim_ew = pd_sim_ew.pivot(index='user1', columns='user2', values='similarity')
    sim_jn = pd_sim_jn.pivot(index='user1', columns='user2', values='similarity')
    sim_pa = pd_sim_pa.pivot(index='user1', columns='user2', values='similarity')
    sim_waa = pd_sim_waa.pivot(index='user1', columns='user2', values='similarity')
    sim_wcn = pd_sim_wcn.pivot(index='user1', columns='user2', values='similarity')
    sim_wpa = pd_sim_wpa.pivot(index='user1', columns='user2', values='similarity')
    
    return sim_aa, sim_cn, sim_ew, sim_jn, sim_pa, sim_waa, sim_wcn, sim_wpa

In [19]:
def delRepetitions(lista):
    """
        Funcion auxiliar para evitar que salgan repeticiones en las recomendaciones. Saco la lista de posibles 
        recomendaciones con valores unicos
    """
    conjunto_vacio = set()
    
    # esto sirve para que se haga mas rapido la comprobacion de si el elemento esta en la lista o no
    function_add = conjunto_vacio.add
    
    # hago la lista intensional, para mantener el orden dado en la lista original
    return [x for x in lista if not (x in conjunto_vacio or function_add(x))]

In [20]:
def getWeighing(item, items_recom_with_values):
    weight = sum([value for (it, value) in items_recom_with_values if it == item])
    
    return weight

In [21]:
def write_recommendations(user, recom_list, measure):        
    f = open('recommendations/user_weighted-vot_recoms_50.csv', 'a')
    f.write(str(user) + ',' + measure + ',' + str(recom_list) + '\n') 
    
    f.close()

In [22]:
def getKrecommendations(row, df_users, k, measure, users):
    """
        Funcion que devuelve la lista de k mejores problemas para el usuario dado teniendo en cuenta que 
        las recomendaciones no son items con los que haya interactuado el usuario
    """
    
    my_user = row['user_id']
    
    # obtengo los usuarios que tienen un valor de similitud mayor que cero
    sim_users = [u for u in users if measure[u][my_user] > 0]
    sim_users_values = [measure[u][my_user] for u in users if measure[u][my_user] > 0]
    total = sum(sim_users_values)
    
    # obtengo los items con los que han interactuado los usuarios similares
    items_recom_with_values = [(item, measure[u][my_user]) for u in sim_users for item in df_users[u] ]
    items_recom_no_values = list(set([item for (item, _) in items_recom_with_values]))
    
    # Sistema de votación ponderada: para cada item que aparezca, sumar todos sus valores de similitud asociado / total
    items_recom = [(item, getWeighing(item, items_recom_with_values)/total) for item in items_recom_no_values]
    
    items_recom.sort(key=operator.itemgetter(1), reverse=True)
    
    # y me quedo con el primer elemento de la tupla, que es el item a recomendar
    list_sim_final = [x for (x,_) in items_recom] 
        
    # ahora elimino los items que estan en la lista de items con los que ha interactuado el target user
    list_final = [x for x in list_sim_final if x not in df_users[my_user]]
    
    # y quito las repeticiones
    list_final = delRepetitions(list_final)
    
    list_fin_rec = [x for x in list_final if x in items_eval]
    
    return list_fin_rec

In [23]:
def apply_getKrecommendations(df_new, df_users, k, measure, users, items_eval, user, my_measure):
    """
    Function to generate a new column with the list of recommendations for each user
    """

    df_new['recommendation_original'] = df_new.apply(lambda row: getKrecommendations(row, df_users, k, measure, users), axis=1)
    df_new['recommendation'] = df_new.apply(lambda row: row['recommendation_original'][:k], axis=1)    
        
    if k == 10:
        df_new.apply(lambda row: write_recommendations(user, row['recommendation_original'], my_measure), axis=1)
        
    return df_new

In [24]:
def calculateMetricsResults(list_recom_items, list_recom_items_original, user_list_to_recommend, list_eval_items, list_rel_accessible, k):    
    set_df_metric = {'user_id': user_list_to_recommend, 'eval_items': list_eval_items, 'recom_items': list_recom_items, 'rel_accessible': list_rel_accessible, 'recom_items_original': list_recom_items_original}
    metric_df = pd.DataFrame.from_dict(set_df_metric)
    return metric_df

In [25]:
def one_hit(row):
    """
        Funcion que implementa la metrica one hit. Devuelve un 1 si para un usuario dado, ese usuario ha interactuado 
        con al menos uno de los items que se le ha recomendado en el evaluation_set. 
        Cero si no hay ningun item de los recomendados con los que el usuario haya interactuado
    """
    num_items_common = np.intersect1d(row['recom_items'], row['eval_items'])
    
    if len(num_items_common) >= 1:
        return 1
    else:
        return 0

In [26]:
def mrr(row): 
    """
        Funcion que va a implementar la metrica de evaluacion mrr:
        mrr = 1/ranki, donde ranki es la posicion del primer item correcto
    """

    num_items_common = np.intersect1d(row['recom_items'], row['eval_items'])
    
    if len(num_items_common) >= 1:

        # hago la busqueda del primer elemento que esta en la lista de recomendados
        fst_correct_item = -1
        encontrado = False
        i = 0
        ranki = 0
        #print(ranki)
        while (i < len(row['recom_items'])) and (encontrado == False):
            if row['recom_items'][i] in row['eval_items']:
                # fst_correct_item = row['recom_items'][i]
                # print(fst_correct_item)
                ranki = i + 1
                encontrado = True
                #print("entro")
            else:
                i = i + 1
                
        if ranki == 0:
            return 0
        else:
            return (1/ranki)

    else:
        return 0


In [27]:
def precision(row):
    """
        Funcion que va a implementar la metrica precision en k: 
        (cuantos de los interactuados con el usuario estan entre los recomendados) / todos los recomendados
    """
    
    num_items_common = np.intersect1d(row['recom_items'], row['eval_items'])
    
    # print(num_items_common)
    
    return (len(num_items_common)/len(row['recom_items']))

In [28]:
def recall(row):
    """
        Funcion que implementa la metrica recall
        (cuantos de los interactuados con el usuario estan entre los recomendados) / todos los evaluados
    """
    num_items_common = np.intersect1d(row['recom_items'], row['eval_items'])
    
    # print(num_items_common)
    
    return (len(num_items_common)/len(row['eval_items']))

In [29]:
def f1(row):
    """
        Funcion que calcula el f1 en funcion de precision y recall
    """
    denominador = row['precision'] + row['recall']
    
    if denominador == 0:
        return 0
    else:
        return (2 * row['precision'] * row['recall']) / denominador

In [30]:
def rPrecision(row):
    """
        Funcion que va a implementar la metrica r-precision: cuales de los recomendados 
        son relevantes en el conjunto de accesibles por el grafo 
    """
    if len(row['rel_accessible']) == 0:
        return 0
    else:
        recomendations = row['recom_items_original'][:len(row['rel_accessible'])]
        num_items_common = np.intersect1d(recomendations, row['rel_accessible'])

        # print(num_items_common)

        return (len(num_items_common)/len(row['rel_accessible']))

In [31]:
def calculateScoreResults(metric_df):
    """
        Function to build a dataframe with the results for the evaluation metrics
    """
    metric_df['one_hit'] = metric_df.apply(lambda row: one_hit(row), axis=1)
    metric_df['mrr'] = metric_df.apply(lambda row: mrr(row), axis=1)
    metric_df['precision'] = metric_df.apply(lambda row: precision(row), axis=1)
    metric_df['recall'] = metric_df.apply(lambda row: recall(row), axis=1)
    metric_df['f1'] = metric_df.apply(lambda row: f1(row), axis=1)
    metric_df['rprec'] = metric_df.apply(lambda row: rPrecision(row), axis=1)

    result_one_hit = metric_df['one_hit'].mean()
    result_precision = metric_df['precision'].mean()
    result_mrr = metric_df['mrr'].mean()
    result_recall = metric_df['recall'].mean()
    result_f1 = metric_df['f1'].mean()
    result_rprec = metric_df['rprec'].mean()

    # voy a crear un diccionario con los resultados
    results_metrics = {'one_hit': result_one_hit, 'precision': result_precision, 'mrr': result_mrr, 'recall': result_recall, 'f1': result_f1, 'rprec': result_rprec}
    # results_metrics = {'rprec': result_rprec}

    
    return results_metrics

In [32]:
def main_process(user, items_eval):
    training_set = pd.read_csv('user-datasets-50/user_' + str(user) + '_training.csv')
    evaluation_set = pd.read_csv('user-datasets-50/user_' + str(user) + '_test.csv')

    # I get the list of nodes
    nodes = training_set.user.unique()
    print(len(nodes))

    # I create a dictionary: keys are the items, and values are the list of users that are interacted with this item
    grouped = training_set.groupby('user')['item'].apply(list)

    # I create the links with the suitable format for nx
    links = createLinks(grouped, nodes, WEIGHT_THRESHOLD)
    print(len(links))

    ##################### GRAPH CREATION
    # I create the graph
    graph = create_graph_nx(nodes, links)
    
    # diccionario que va a contener como key el user, como value, los items con los que ha interactuado el user
    df_users_simple = {}
    grouped_user = training_set.groupby('user')['item'].apply(list)
    for i,j in zip(grouped_user.index.tolist(), grouped_user.values.tolist()):
        df_users_simple[i] = j 
        
    # PARA LA CONSTRUCCION DE R PRECISION -----------
    grouped_user_eval = evaluation_set.groupby('user')['item'].apply(list)

    # convierto la serie en un dataframe
    df_users_eval = pd.DataFrame({'user_id':grouped_user_eval.index, 'list_item_id':grouped_user_eval.values})

    user_list_to_recommend = list(evaluation_set.user.unique())

    # hago el filtro para los usuarios a los que tengo que recomendar
    df_users_eval = df_users_eval[df_users_eval['user_id'].isin(user_list_to_recommend)]
    # primero voy a ordenar la lista de usuarios a recomendar
    user_list_to_recommend.sort()
    list_eval_items = df_users_eval['list_item_id'].tolist()

    df_users_eval["user_accessible"] = df_users_eval.apply (lambda row: areAccessibleUser(row['user_id'], list(nodes), graph), axis=1)
    df_users_eval["rel_accessible"] = df_users_eval.apply (lambda row: areAccessible(row['list_item_id'], row['user_accessible'], graph), axis=1)
    df_users_eval["num_accessible"] = df_users_eval.apply (lambda row: len(row['rel_accessible']), axis=1)
    list_rel_accessible = df_users_eval['rel_accessible'].tolist()
    list_eval_items = df_users_eval['list_item_id'].tolist()
    user_list_to_recommend = df_users_eval['user_id'].tolist()
    
    # construccion de matrices de similitud
    build_matrix_users(nodes, graph, user)

    sim_aa, sim_cn, sim_ew, sim_jn, sim_pa, sim_waa, sim_wcn, sim_wpa = getting_sim_matrix(user)
    list_measures = [sim_aa, sim_cn, sim_ew, sim_jn, sim_pa, sim_waa, sim_wcn, sim_wpa]

    # creo el nuevo dataframe con los resultados 
    column_user_recomend = {'user_id': user_list_to_recommend}
    dataframe_user_recomend = pd.DataFrame.from_dict(column_user_recomend)

    dataframe_k_measures_original = list()

    dataframe_k_measures_original = [[apply_getKrecommendations(dataframe_user_recomend, df_users_simple, k, list_measures[MEASURES.index(measure)], nodes, items_eval, user, measure).copy() for measure in MEASURES] for k in range_K]
    metrics_results = [[calculateMetricsResults(dataframe_k_measures_original[k][MEASURES.index(measure)]['recommendation'].tolist(), dataframe_k_measures_original[k][MEASURES.index(measure)]['recommendation_original'].tolist(), user_list_to_recommend, list_eval_items, list_rel_accessible, range_K[k]) for measure in MEASURES] for k in range(K)]

    return metrics_results

In [33]:
def write_results_file(dir, result, k, measure):
    f = open(dir, 'a')
    f.write(str(k) + ',' + measure + ',' + str(result['one_hit']) + ',' + str(result['precision']) + ',' + str(result['mrr']) + ',' + str(result['recall']) + ',' +  str(result['f1']) + ',' +  str(result['rprec']) + '\n') 
    
    f.close()

In [34]:
df = pd.read_csv('most_pop_200_users.csv')


print(df)

       user  item  rating  timestamp
0       305   451       3  886324817
1        62   257       2  879372434
2       194   274       2  879539794
3       299   144       4  877881320
4       308     1       4  887736532
...     ...   ...     ...        ...
15949   864   685       4  888891900
15950   279    64       1  875308510
15951   660   229       2  891406212
15952   880   476       3  880175444
15953   716   204       5  879795543

[15954 rows x 4 columns]


In [35]:
df = df.sort_values('timestamp')

print(df)

users = df.user.unique()
items = df.item.unique()
num_items = len(items)
num_users = len(users)

print(num_items)
print(num_users)

       user  item  rating  timestamp
3590    276   258       5  874786337
15845   276   300       4  874786338
7398    276   328       4  874786366
145     276   294       4  874786366
2449    276   288       4  874786392
...     ...   ...     ...        ...
9791    796   393       4  893218933
9546    796   419       5  893219001
15928   407     7       4  893253637
6835    653   272       4  893275949
12346   653   245       4  893276091

[15954 rows x 4 columns]
200
129


In [36]:
items_eval = sorted([508,240,273,274,9,451,286,313,411,180,1,153,179,588,187,259,582,527])
len(items_eval)

18

In [37]:
# users = [1, 2, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 22, 23, 24, 25, 26, 28, 30, 32, 37, 38, 41, 42, 43, 44, 45, 48, 49, 52, 54, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 69, 70, 71, 72, 73, 74, 75, 76, 77, 79, 81, 82, 83, 84, 85, 87, 89, 90, 91, 92, 94, 95, 96, 97, 99, 101, 102, 103, 104, 106, 108, 109, 110, 113, 114, 115, 116, 117, 118, 119, 121, 123, 125, 128, 130, 135, 137, 138, 141, 144, 145, 148, 151, 152, 154, 157, 158, 159, 160, 161, 162, 164, 168, 174, 175, 177, 178, 180, 181, 183, 184, 185, 186, 187, 188, 189, 190, 193, 194, 195, 197, 198, 200, 201, 207, 210, 213, 214, 215, 216, 217, 218, 221, 222, 223, 224, 226, 227, 230, 232, 233, 234, 235, 236, 237, 239, 243, 244, 246, 248, 249, 250, 251, 253, 254, 255, 256, 259, 262, 263, 264, 265, 267, 268, 269, 270, 271, 272, 274, 275, 276, 277, 279, 280, 283, 286, 287, 288, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 301, 303, 305, 307, 308, 311, 312, 313, 314, 315, 316, 318, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 332, 334, 336, 338, 339, 340, 342, 343, 344, 345, 346, 347, 348, 350, 352, 354, 357, 360, 361, 363, 365, 370, 371, 372, 373, 374, 378, 379, 380, 381, 382, 383, 385, 387, 388, 389, 391, 392, 393, 394, 395, 396, 397, 398, 399, 401, 402, 403, 405, 406, 407, 409, 411, 412, 413, 416, 417, 421, 422, 423, 424, 425, 426, 429, 430, 432, 433, 435, 436, 437, 442, 445, 447, 449, 450, 452, 453, 454, 455, 456, 457, 458, 459, 460, 463, 464, 465, 466, 468, 470, 472, 474, 476, 478, 479, 480, 481, 483, 484, 486, 487, 488, 489, 490, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 503, 504, 505, 506, 507, 508, 514, 518, 521, 523, 524, 525, 526, 527, 528, 530, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 545, 548, 551, 552, 553, 554, 555, 556, 559, 560, 561, 562, 566, 567, 568, 569, 573, 576, 577, 579, 582, 586, 587, 588, 590, 591, 592, 593, 595, 600, 601, 603, 605, 606, 608, 610, 615, 617, 618, 619, 620, 621, 622, 623, 624, 625, 627, 629, 630, 632, 633, 634, 637, 638, 639, 640, 642, 643, 645, 647, 648, 650, 653, 654, 655, 658, 659, 660, 661, 663, 664, 665, 666, 667, 668, 669, 670, 671, 674, 676, 677, 679, 680, 682, 683, 684, 686, 690, 692, 693, 694, 697, 698, 699, 703, 704, 705, 707, 708, 709, 710, 711, 712, 714, 715, 716, 717, 719, 721, 724, 727, 731, 733, 734, 735, 738, 739, 741, 745, 746, 747, 748, 749, 751, 753, 756, 757, 758, 759, 761, 763, 764, 766, 768, 770, 771, 773, 774, 776, 777, 778, 779, 780, 781, 782, 786, 788, 790, 793, 795, 796, 798, 802, 804, 805, 806, 807, 815, 821, 823, 825, 826, 829, 830, 831, 833, 834, 835, 836, 837, 838, 839, 840, 843, 844, 846, 847, 848, 850, 851, 852, 854, 860, 862, 864, 865, 867, 868, 870, 871, 872, 875, 877, 878, 880, 881, 882, 883, 885, 886, 887, 889, 890, 892, 893, 894, 896, 897, 899, 901, 902, 903, 905, 907, 908, 910, 911, 912, 913, 916, 918, 919, 921, 922, 923, 924, 927, 929, 930, 931, 932, 933, 934, 935, 936, 938, 939, 940, 942, 943]

In [38]:
results = list()
i = 1
for user in users:
    print("Current user: " + str(i) + "---" + str(user) + "------")
    results.append(main_process(user, items_eval))
    i = i + 1

Current user: 1---276------
129
8215
Current user: 2---532------
129
8215
Current user: 3---130------
129
8215
Current user: 4---297------
129
8215
Current user: 5---1------
129
8215
Current user: 6---833------
129
8214
Current user: 7---407------
129
8215
Current user: 8---870------
129
8215
Current user: 9---279------
129
8215
Current user: 10---145------
129
8215
Current user: 11---919------
129
8215
Current user: 12---268------
129
8215
Current user: 13---514------
129
8215
Current user: 14---207------
129
8215
Current user: 15---92------
129
8215
Current user: 16---286------
129
8215
Current user: 17---102------
129
8214
Current user: 18---43------
129
8215
Current user: 19---472------
129
8215
Current user: 20---886------
129
8215
Current user: 21---343------
129
8215
Current user: 22---881------
129
8215
Current user: 23---416------
129
8215
Current user: 24---373------
129
8215
Current user: 25---864------
129
8215
Current user: 26---222------
129
8215
Current user: 27---299---

In [39]:
metrics_results_ev = list()
for k in range(K):
    list_k = list()
    for measure in range(len(MEASURES)):
        my_pd_metric = [df[k][measure] for df in results] 
        list_k.append(pd.concat(my_pd_metric))
    metrics_results_ev.append(list_k)

In [40]:
metrics_results = [[calculateScoreResults(metrics_results_ev[k][MEASURES.index(measure)]) for measure in MEASURES] for k in range(K)]

In [41]:
[[write_results_file("results/user_weighted_vot_50.csv", metrics_results[k][MEASURES.index(measure)], range_K[k], measure) for measure in MEASURES] for k in range(K)]


[[None, None, None, None, None, None, None, None],
 [None, None, None, None, None, None, None, None],
 [None, None, None, None, None, None, None, None]]