# Karma Analysis

Karma ranking is based in Karma points which are given by participation of the user to ROS Answers. 
In absence of a recommender system for answerers, users would ask users with high karma, this means, users in the first places of the karma ranking.
As one of the objectives of the Recommender System is to distribute the workload we expect to recommend questions to users not in the firsts positions of the karma ranking.  

In [1]:
import pandas
import plotly.express as px

In [2]:
scenario = "C"

base_path = "/Users/pestefo/u/ra_recommendator_conrec/results"

folders = ["20190714_1211_6p", "20190714_1213_5p", "20190714_1231_7p", "20190714_1231_8p", "20190714_1232_9p", "20190714_1233_10p", "20190714_1234_3p", "20190714_1235_4p", "20190714_1236_2p", ]

db_file = "/Users/pestefo/u/ra_recommendator_conrec/data/v1.2.db"
# db_file = "/Users/pestefo/u/ra_recommendator_conrec/data/v1.db"

In [3]:
import os
from tqdm import tqdm

def get_all_files_from_path(path):
    return [pos_json for pos_json in os.listdir(path) if pos_json.endswith('.json')]

In [4]:
import ast

def get_data_from_file(path):
    with open(path, "r") as fp:
        return ast.literal_eval(fp.read())

def get_position_in_ranking(user_id, file_path):
    for index, pair in enumerate(get_data_from_file(file_path)):
        if pair[0] == user_id:
            return index

In [5]:
import sqlite3 

def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by the db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Error as e:
        print(e)

    return conn

def get_karma_for_user(conn, user_id):
    query = f"""
    SELECT karma
    FROM ros_user
    WHERE id={user_id}
    """
#     print(query)
    cur = conn.cursor()
    cur.execute(query)

    rows = cur.fetchall()
    if len(rows) == 0:
        return -1
    return rows[0][0]


In [6]:
def get_best_ranked_user_for_question(path_to_file):
    return get_data_from_file(path_to_file)[0][0]

In [7]:
conn=create_connection(db_file)

karma_positions = []
for folder in folders:
    path = base_path + '/' + folder + '/' + scenario

    files = get_all_files_from_path(path)

    for file in files:
        if not file.endswith('.json'):
            continue
        
        user_id = get_best_ranked_user_for_question(f"{path}/{file}")
        karma_positions.append(get_karma_for_user(conn,user_id))
    

In [8]:
df = pandas.DataFrame.from_dict(karma_positions)
df = df.rename(columns = {0:"karma"})

In [21]:
fig = px.histogram(df, x="karma",
                   title='Karma points of recommended users',
#                    opacity=0.8,
#                    log_y=True, # represent bars with log scale
                   color_discrete_sequence=['blue'], # color of histogram bars
#                    text_auto=True
#                    log_y=True
                   )
fig.show()

Hacer lo mismo pero con el ranking de karma y comparar < 5 a >= 5 

In [11]:
fig = px.box(df, x="karma")
fig.show()

In [17]:
df2 = df.copy()

df2.karma.values[df2.karma.values <= 10500] = 0
df2.karma.values[df2.karma.values > 10500] = 1

import plotly.express as px
fig = px.histogram(df2, x="karma",
                   title='Histogram of users with karma lower and upper 10.500',
                   labels={'karma':'karma'}, # can specify one label per df column
#                    opacity=0.8,
#                    log_y=True, # represent bars with log scale
                   color_discrete_sequence=['blue'], # color of histogram bars
                   text_auto=True
                   )
fig.show()