# Karma Analysis

Karma ranking is based in Karma points which are given by participation of the user to ROS Answers. 
In absence of a recommender system for answerers, users would ask users with high karma, this means, users in the first places of the karma ranking.
As one of the objectives of the Recommender System is to distribute the workload we expect to recommend questions to users not in the firsts positions of the karma ranking.  

In [155]:
import pandas as pd
import plotly.express as px
import numpy as np

In [2]:
scenario = "C"

base_path = "/Users/pablo.estefo/u/ra_recommendator_conrec/results"

folders = ["20190714_1211_6p", "20190714_1213_5p", "20190714_1231_7p", "20190714_1231_8p", "20190714_1232_9p", "20190714_1233_10p", "20190714_1234_3p", "20190714_1235_4p", "20190714_1236_2p", ]

db_file = "/Users/pablo.estefo/u/ra_recommendator_conrec/data/v1.2.db"

In [3]:
import os
from tqdm import tqdm

def get_all_files_from_path(path):
    return [pos_json for pos_json in os.listdir(path) if pos_json.endswith('.json')]

In [4]:
import ast

def get_data_from_file(path):
    with open(path, "r") as fp:
        return ast.literal_eval(fp.read())

def get_position_in_ranking(user_id, file_path):
    for index, pair in enumerate(get_data_from_file(file_path)):
        if pair[0] == user_id:
            return index

In [110]:
import sqlite3 

def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by the db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Error as e:
        print(e)

    return conn

def get_karma_for_user(conn, user_id):
    query = f"""
    SELECT karma
    FROM ros_user
    WHERE id={user_id}
    """
#     print(query)
    cur = conn.cursor()
    cur.execute(query)

    rows = cur.fetchall()
    if len(rows) == 0:
        return -1
    return rows[0][0]

def get_best_ranked_user_for_question(path_to_file):
    return get_data_from_file(path_to_file)[0][0]

def get_second_best_ranked_user_for_question(path_to_file):
    return get_data_from_file(path_to_file)[1][0]

def get_third_best_ranked_user_for_question(path_to_file):
    return get_data_from_file(path_to_file)[2][0]

def get_ranking_of_karma(conn):
    query = """
    select id, karma
    from ros_user 
    ORDER by karma DESC
    """
    cur = conn.cursor()
    cur.execute(query)
    rows = cur.fetchall()
    
    karma_ranking = []
    for idx, values in enumerate(rows):
        karma_ranking.append((idx, values[0], values[1]))
    
    df =  pandas.DataFrame(karma_ranking, columns =['Ranking', 'Id', 'Karma']).assign(Freq=0)
    df = df.set_index("Id")
    return df


In [106]:
karma_ranking = get_ranking_of_karma(conn)
karma_ranking

Unnamed: 0_level_0,Ranking,Karma,Freq
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,0,48115,0
5184,1,43312,0
1034,2,41279,0
122,3,29613,0
25,4,24183,0
...,...,...,...
36346,21383,1,0
36347,21384,1,0
36348,21385,1,0
36349,21386,1,0


In [51]:
karma_ranking.at[3,'Freq'] = karma_ranking.at[3,'Freq'] + 1

karma_ranking.at[3,'Freq']

2

In [None]:
df2 

In [111]:
conn=create_connection(db_file)

karma_ranking = get_ranking_of_karma(conn)

karma_ranking_2 = karma_ranking.copy()
karma_ranking_3 = karma_ranking.copy()
for folder in folders:
    path = base_path + '/' + folder + '/' + scenario

    files = get_all_files_from_path(path)

    for file in files:
        if not file.endswith('.json'):
            continue
        
        user_id = get_best_ranked_user_for_question(f"{path}/{file}")
        karma_ranking.at[user_id, 'Freq'] = karma_ranking.at[user_id, 'Freq'] + 1

        user_id_2 = get_second_best_ranked_user_for_question(f"{path}/{file}")
        karma_ranking_2.at[user_id_2, 'Freq'] = karma_ranking_2.at[user_id_2, 'Freq'] + 1        

        user_id_3 = get_third_best_ranked_user_for_question(f"{path}/{file}")
        karma_ranking_3.at[user_id_3, 'Freq'] = karma_ranking_3.at[user_id_3, 'Freq'] + 1        



# Debe haber algun error pq los 3 graficos salen iguales

In [54]:
karma_ranking

Unnamed: 0_level_0,Ranking,Karma,Freq
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,0,48115,3483
5184,1,43312,16422
1034,2,41279,1481
122,3,29613,589
25,4,24183,973
...,...,...,...
36346,21383,1,0
36347,21384,1,0
36348,21385,1,0
36349,21386,1,0


In [172]:
import plotly.express as px
df = karma_ranking[karma_ranking.Freq > 1]
fig = px.scatter(df, x="Ranking", y="Freq", log_y=True, log_x=True,
                labels=dict(Ranking="Karma Ranking", Freq="Frequency of Recommendation"))
fig.update_layout(
#     height=600,
#     width=600,
    title_text='Frequency of a user of being recommended in First Place (log-log)'
)
fig.show()

In [148]:
# Second best
import plotly.express as px
df = karma_ranking_2[karma_ranking_2.Freq > 1]
fig = px.scatter(df, x="Ranking", y="Freq", log_y=True, log_x=True,
                labels=dict(Ranking="Karma Ranking", Freq="Frequency of Recommendation"))
fig.update_layout(
#     height=800,
    title_text='Frequency of a user of being recommended in Second Place (log-log)'
)
fig.show()

In [171]:
# Third best
import plotly.express as px
df = karma_ranking_3[karma_ranking_3.Freq > 1]
fig = px.scatter(df, x="Ranking", y="Freq", log_y=True, log_x=True,
                labels=dict(Ranking="Karma Ranking", Freq="Frequency of Recommendation"))
fig.update_layout(
#     height=800,
#     width=600,
    title_text='Frequency of a user of being recommended in Third Place (log-log)'
)
fig.show()

In [205]:
import plotly.express as px
df = karma_ranking[karma_ranking.Karma > 1]
fig = px.scatter(df, x="Ranking", y="Karma", log_x=True, log_y=True,
                labels=dict(Ranking="Karma Ranking", Karma="Karma Points"))
fig.update_layout(
#     height=800,
    title_text='Karma Points per user (sorted)'
)
fig.show()

In [206]:
karma_ranking_split = karma_ranking.copy()
karma_ranking_split["Participation"] = np.where(karma_ranking_split["Ranking"] <= 36, "High", "Low")
karma_ranking_split = karma_ranking_split.groupby("Participation").sum()
karma_ranking_split.head()

Unnamed: 0_level_0,Ranking,Karma,Freq
Participation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
High,666,439752,27992
Low,228711912,647789,4548


In [207]:
import plotly.express as px
fig = px.bar(karma_ranking_split, y="Freq")
fig.show()

In [None]:
_