# Comparing Recommendations against Karma

For each scenario, get insights about the recommendation of users and their position and position in the Karma ranking

In [6]:
import pandas as pd
import os
users = pd.read_csv(os.environ['PWD'] + '/data/data_extracted_from_db/ra_user.csv', sep=";")
karma_ranking = users[['id','karma']].sort_values(by=['karma'], ascending=False)
karma_ranking['pos'] = -1

j = 1
for i, row in karma_ranking.iterrows():
    row['pos'] = j
    j += 1

karma_ranking.head()

Unnamed: 0,id,karma,pos
2,3,48115,1
5075,5184,43312,2
1032,1034,41279,3
121,122,29613,4
24,25,24183,5


In [7]:
karma_ranking.query("id == 26450")

Unnamed: 0,id,karma,pos
13082,26450,3367,44


## Analysing one question

In [8]:
question_ranking = pd.read_json(os.environ['PWD'] + '/results/20190714_1213_5p/C/63943.json')
question_ranking.columns = ['id', 'score']
question_ranking['pos'] = -1

j = 1
for idx, row in question_ranking.iterrows():
    question_ranking.loc[idx,'pos'] = j
    j += 1

In [9]:
q_top30 = question_ranking.head(30)

q_top30

Unnamed: 0,id,score,pos
0,5184,421.119735,1
1,1034,244.536824,2
2,122,155.105362,3
3,3,142.111489,4
4,25,108.342462,5
5,21,107.616116,6
6,26450,68.966927,7
7,351,58.644979,8
8,875,58.614168,9
9,11,53.011896,10


In [10]:
a = []
for idx, row in q_top30.iterrows():
    id_ = q_top30.loc[idx,'id']
    karma_pos_ = karma_ranking.query(f"id == {id_}").iat[0,2]
    a.append((q_top30.loc[idx,'pos'], karma_pos_))
a

[(1, 2),
 (2, 3),
 (3, 4),
 (4, 1),
 (5, 5),
 (6, 7),
 (7, 44),
 (8, 60),
 (9, 14),
 (10, 15),
 (11, 6),
 (12, 16),
 (13, 149),
 (14, 19),
 (15, 8),
 (16, 66),
 (17, 375),
 (18, 419),
 (19, 20),
 (20, 170),
 (21, 10),
 (22, 28),
 (23, 13),
 (24, 23),
 (25, 24),
 (26, 424),
 (27, 1511),
 (28, 325),
 (29, 37),
 (30, 78)]

In order to visualize if the algorithm (TMBA) is recommending the top 30 Karma users or not, we split them into two categories: *karma-experts* and *non-karma-experts*.


In [29]:
def karma_pos(user_id):
    return karma_ranking.query(f"id == {user_id}").iat[0,2]
    
    

In [17]:
import plotly.express as px
iris = px.data.iris()
data = []
for t in a:
    karma_expert = 'expert' if t[1] <= 30 else 'non-expert'
    data.append((t[0], t[1], karma_expert))
    
df = pd.DataFrame(data, columns=['pos', 'karma_pos', 'karma_expert'])
fig = px.scatter(df, x="pos", y="karma_pos", color="karma_expert", hover_data=['pos','karma_pos'])
fig.show()

## Generate table for complete analysis


In [33]:


question_ranking = pd.read_json(os.environ['PWD'] + '/results/20190714_1213_5p/C/63943.json')
question_ranking.columns = ['id', 'score']
question_ranking['q_id'] = -1
question_ranking['scenario'] = '-'
question_ranking['nb_of_participants'] = -1
question_ranking['pos'] = -1
question_ranking['karma_pos'] = -1

j = 1
for idx, row in question_ranking.iterrows():
    question_ranking.loc[idx,'pos'] = j
    question_ranking.loc[idx,'karma_pos'] = karma_pos(int(row['id']))
    j += 1
    
question_ranking

Unnamed: 0,id,score,q_id,scenario,nb_of_participants,pos,karma_pos
0,5184,421.119735,-1,-,-1,1,2
1,1034,244.536824,-1,-,-1,2,3
2,122,155.105362,-1,-,-1,3,4
3,3,142.111489,-1,-,-1,4,1
4,25,108.342462,-1,-,-1,5,5
5,21,107.616116,-1,-,-1,6,7
6,26450,68.966927,-1,-,-1,7,44
7,351,58.644979,-1,-,-1,8,60
8,875,58.614168,-1,-,-1,9,14
9,11,53.011896,-1,-,-1,10,15


In [97]:
path = os.environ['PWD'] + '/results/'

experiments = {
     6: {"folder": "20190714_1211_6p", "questions": 467, "participants": 6},
     5: {"folder": "20190714_1213_5p", "questions": 1181, "participants": 5},
     7: {"folder": "20190714_1231_7p", "questions": 215, "participants": 7},
     8: {"folder": "20190714_1231_8p", "questions": 106, "participants": 8},
     9: {"folder": "20190714_1232_9p", "questions": 52, "participants": 9},
     10: {"folder": "20190714_1233_10p", "questions": 36, "participants": 10},
     4: {"folder": "20190714_1235_4p", "questions": 3271, "participants": 4},
     3: {"folder": "20190714_1234_3p", "questions": 9052, "participants": 3},
     2: {"folder": "20190714_1236_2p", "questions": 18162, "participants": 2}
}

scenarios = ['A', 'B', 'C', 'D']
all_data = pd.DataFrame(columns=["id","score","q_id","scenario","participants","pos","karma_pos","ra_expert"])

for i in range(2,11):
    exp = experiments[i]
    path_to_exp = path + exp['folder']
    for scenario in scenarios:
        print(exp['participants'],scenario)
        files = next(os.walk(path_to_exp + f'/{scenario}'))[2]

        i = 0
        for f in files:
            q_id = f[:-5]
            
            question_ranking = pd.read_json(path_to_exp + f'/{scenario}/{f}')
            question_ranking.columns = ['id', 'score']
            question_ranking = question_ranking.head(50)
            
            question_ranking['q_id'] = q_id
            question_ranking['scenario'] = scenario
            question_ranking['participants'] = exp['participants']
            question_ranking['pos'] = -1
            question_ranking['karma_pos'] = -1

            j = 1
            for idx, row in question_ranking.iterrows():
                question_ranking.loc[idx,'pos'] = j
                question_ranking.loc[idx,'karma_pos'] = karma_pos(int(row['id']))
                question_ranking.loc[idx,'ra_expert'] = question_ranking.loc[idx,'karma_pos'] <= 30                 
                j += 1
                
            all_data = all_data.append(question_ranking)



all_data.to_csv(os.environ['PWD']+'/results/comparison_vs_karma.csv')

2 A


KeyboardInterrupt: 

Unnamed: 0,id,score,q_id,scenario,nb_of_participants,pos,karma_pos,ra_expert


In [86]:
import plotly.express as px
iris = px.data.iris()

fig = px.scatter(question_ranking, x="pos", y="karma_pos", color="ra_expert", hover_data=['pos','karma_pos'])
fig.show()