# Comparing Recommendations against Karma

For each scenario, get insights about the recommendation of users and their position and position in the Karma ranking

Iterar sobre la lista de usuarios para asignales su posicion en el ranking de Karma

In [2]:
import pandas as pd
import os
users = pd.read_csv(os.environ['PWD'] + '/data/data_extracted_from_db/ra_user.csv', sep=";")
karma_ranking = users[['id','karma']].sort_values(by=['karma'], ascending=False)
karma_ranking['pos_karma'] = -1

j = 1
for i, row in karma_ranking.iterrows():
    row['pos_karma'] = j
    j += 1

karma_ranking.head()

Unnamed: 0,id,karma,pos_karma
2,3,48115,1
5075,5184,43312,2
1032,1034,41279,3
121,122,29613,4
24,25,24183,5


In [3]:
karma_ranking.query("id == 26450")

Unnamed: 0,id,karma,pos_karma
13082,26450,3367,44


## Ejemplo: Analisis de la pregunta [63943](https://answers.ros.org/question/63943/)

Extraer el ranking y el score de cada usuario para esa pregunta

In [4]:

question_ranking = pd.read_json(os.environ['PWD'] + '/results/20190714_1213_5p/C/63943.json')
question_ranking.columns = ['id', 'score']
question_ranking['pos_tmba'] = -1

j = 1
for idx, row in question_ranking.iterrows():
    question_ranking.loc[idx,'pos_tmba'] = j
    j += 1

Mostrar los primeros 30 usuarios recomendados: id, score, posicion TMBA

In [5]:
q_top30 = question_ranking.head(30)

q_top30

Unnamed: 0,id,score,pos_tmba
0,5184,421.119735,1
1,1034,244.536824,2
2,122,155.105362,3
3,3,142.111489,4
4,25,108.342462,5
5,21,107.616116,6
6,26450,68.966927,7
7,351,58.644979,8
8,875,58.614168,9
9,11,53.011896,10


Comparar posiciones en los dos rankings (TMBA y Karma)

In [6]:
for idx, row in q_top30.iterrows():
    id_ = q_top30.loc[idx,'id']
    karma_pos_ = karma_ranking.query(f"id == {id_}").iat[0,2]
    q_top30.loc[idx,'pos_karma'] = karma_pos_
q_top30

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Unnamed: 0,id,score,pos_tmba,pos_karma
0,5184,421.119735,1,2.0
1,1034,244.536824,2,3.0
2,122,155.105362,3,4.0
3,3,142.111489,4,1.0
4,25,108.342462,5,5.0
5,21,107.616116,6,7.0
6,26450,68.966927,7,44.0
7,351,58.644979,8,60.0
8,875,58.614168,9,14.0
9,11,53.011896,10,15.0


In order to visualize if the algorithm (TMBA) is recommending the top 30 Karma users or not, we split them into two categories: *karma-experts* and *non-karma-experts*.


In [11]:
def karma_pos(user_id):
    return karma_ranking.query(f"id == {user_id}").iat[0,2]
    
    

## Generate table of TMBA vs Karma for question 63943 scenario C


In [15]:
question_ranking_63943 = pd.read_json(os.environ['PWD'] + '/results/20190714_1213_5p/C/63943.json')
question_ranking_63943.columns = ['id', 'score']
question_ranking_63943['q_id'] = -1
question_ranking_63943['scenario'] = '-'
question_ranking_63943['nb_of_participants'] = -1
question_ranking_63943['pos'] = -1
question_ranking_63943['karma_pos'] = -1

j = 1
for idx, row in question_ranking.iterrows():
    question_ranking_63943.loc[idx,'pos'] = j
    question_ranking_63943.loc[idx,'karma_pos'] = karma_pos(int(row['id']))
    j += 1
    
question_ranking_63943.head()

Unnamed: 0,id,score,q_id,scenario,nb_of_participants,pos,karma_pos
0,5184,421.119735,-1,-,-1,1,2
1,1034,244.536824,-1,-,-1,2,3
2,122,155.105362,-1,-,-1,3,4
3,3,142.111489,-1,-,-1,4,1
4,25,108.342462,-1,-,-1,5,5


## Generate table of TMBA vs Karma for complete analysis


In [21]:

import time
from tqdm import  tqdm_notebook as tqdm
path = os.environ['PWD'] + '/results/'

path_to_big_table = os.environ['PWD']+f'/results/comparison_vs_karma{time.strftime("%Y%m%d_%H%M")}.csv'

experiments = {
     6: {"folder": "20190714_1211_6p", "questions": 467, "participants": 6},
     5: {"folder": "20190714_1213_5p", "questions": 1181, "participants": 5},
     7: {"folder": "20190714_1231_7p", "questions": 215, "participants": 7},
     8: {"folder": "20190714_1231_8p", "questions": 106, "participants": 8},
     9: {"folder": "20190714_1232_9p", "questions": 52, "participants": 9},
     10: {"folder": "20190714_1233_10p", "questions": 36, "participants": 10},
     4: {"folder": "20190714_1235_4p", "questions": 3271, "participants": 4},
     3: {"folder": "20190714_1234_3p", "questions": 9052, "participants": 3},
     2: {"folder": "20190714_1236_2p", "questions": 18162, "participants": 2}
}

scenarios = ['A', 'B', 'C', 'D']
all_data = pd.DataFrame(columns=["id","score","q_id","scenario","participants","pos","karma_pos","ra_expert"])

for nb_of_participants in tqdm(range(2,11), desc='participants'):
    exp = experiments[nb_of_participants]
    path_to_exp = path + exp['folder']
    
    for scenario in tqdm(scenarios, desc='scenarios'):
        files = next(os.walk(path_to_exp + f'/{scenario}'))[2]
        
        i = 0
        for f in tqdm(files, desc='files'):
            q_id = f[:-5]

            try:
                question_ranking = pd.read_json(path_to_exp + f'/{scenario}/{f}')
            except ValueError:
                print(f'Value error for file: {f}')
                continue

            question_ranking.columns = ['id', 'score']
            question_ranking = question_ranking.head(30)
            
            question_ranking['q_id'] = q_id
            question_ranking['scenario'] = scenario
            question_ranking['participants'] = exp['participants']
            question_ranking['pos'] = -1
            question_ranking['karma_pos'] = -1

            j = 1
            for idx, row in question_ranking.iterrows():
                question_ranking.loc[idx,'pos'] = j
                question_ranking.loc[idx,'karma_pos'] = karma_pos(int(row['id']))
                question_ranking.loc[idx,'ra_expert'] = question_ranking.loc[idx,'karma_pos'] <= 30             
                j += 1
                
            question_ranking
            if not os.path.isfile(path_to_big_table):
               question_ranking.to_csv(path_to_big_table, header='column_names')
            else: # else it exists so append without writing the header
               question_ranking.to_csv(path_to_big_table, mode='a', header=False)
            
            




HBox(children=(IntProgress(value=0, description='participants', max=9, style=ProgressStyle(description_width='…

HBox(children=(IntProgress(value=0, description='scenarios', max=4, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='files', max=18163, style=ProgressStyle(description_width='ini…

Value error for file: all_questions


HBox(children=(IntProgress(value=0, description='files', max=18161, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='files', max=18161, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='files', max=18161, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='scenarios', max=4, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='files', max=9052, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='files', max=9052, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='files', max=9052, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='files', max=9052, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='scenarios', max=4, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='files', max=3271, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='files', max=3270, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='files', max=3270, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='files', max=3270, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='scenarios', max=4, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='files', max=1181, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='files', max=1181, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='files', max=1181, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='files', max=1181, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='scenarios', max=4, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='files', max=467, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='files', max=467, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='files', max=467, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='files', max=467, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='scenarios', max=4, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='files', max=215, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='files', max=215, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='files', max=215, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='files', max=215, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='scenarios', max=4, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='files', max=106, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='files', max=106, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='files', max=106, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='files', max=106, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='scenarios', max=4, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='files', max=52, style=ProgressStyle(description_width='initia…

HBox(children=(IntProgress(value=0, description='files', max=52, style=ProgressStyle(description_width='initia…

HBox(children=(IntProgress(value=0, description='files', max=52, style=ProgressStyle(description_width='initia…

HBox(children=(IntProgress(value=0, description='files', max=52, style=ProgressStyle(description_width='initia…

HBox(children=(IntProgress(value=0, description='scenarios', max=4, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='files', max=36, style=ProgressStyle(description_width='initia…

HBox(children=(IntProgress(value=0, description='files', max=36, style=ProgressStyle(description_width='initia…

HBox(children=(IntProgress(value=0, description='files', max=36, style=ProgressStyle(description_width='initia…

HBox(children=(IntProgress(value=0, description='files', max=36, style=ProgressStyle(description_width='initia…




In [None]:
question_ranking.head(5)