# Recall Analysis for Scenario C

### Imports

In [155]:
import pandas
from tqdm import tqdm
import plotly.express as px
import plotly.graph_objects as go

### Constants

In [12]:
base_path = "/Users/pestefo/u/ra_recommendator_conrec/results/"

folders = {
    6: "20190714_1211_6p", 
    5: "20190714_1213_5p", 
    7: "20190714_1231_7p", 
    8: "20190714_1231_8p", 
    9: "20190714_1232_9p", 
    10: "20190714_1233_10p", 
    3: "20190714_1234_3p", 
    4: "20190714_1235_4p", 
    2: "20190714_1236_2p"
}

# db_file = "/Users/pestefo/u/ra_recommendator_conrec/data/v1.2.db"
db_file = "/Users/pestefo/u/ra_recommendator_conrec/data/v1.db"


### Helper functions

In [2]:
import os
from tqdm import tqdm

def get_all_files_from_path(path):
    return [pos_json for pos_json in os.listdir(path) if pos_json.endswith('.json')]

In [4]:
import ast

def get_data_from_file(path):
    with open(path, "r") as fp:
        return ast.literal_eval(fp.read())

def get_position_in_ranking(user_id, file_path):
    for index, pair in enumerate(get_data_from_file(file_path)):
        if pair[0] == user_id:
            return index

In [5]:
import sqlite3 

def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by the db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Error as e:
        print(e)

    return conn


In [None]:
def calculate_recall(participants, ranked_users):
    return len(set(participants).intersection(set(ranked_users)))/len(participants)

def get_ranked_users(nb_of_participants, file, scenario):
    return list(map(lambda x: x[0], get_data_from_file(base_path + folders[nb_of_participants] + "/" + scenario+ "/" + file)))
    
def get_participants(conn, question_id):
    query = f"""
    SELECT DISTINCT author
    FROM ros_answer
    WHERE id in (SELECT ros_answer_id
    FROM ros_question_answer
    WHERE ros_question_id =  "{question_id}")
    """
    
    cur = conn.cursor()
    cur.execute(query)

    rows = cur.fetchall()
    rows = [row[0] for row in rows]
    return rows
    
def recall_for_questions_participants(conn, nb_of_participants, scenario):
    files = get_all_files_from_path(path=base_path + folders[nb_of_participants] + "/" + scenario)
    recall = None
    
    data = {}
    
    for file in tqdm(files):
        ranked_users = get_ranked_users(nb_of_participants, file, scenario)
        question_id = file[:-5]
        participant_ids = get_participants(conn, question_id)
        data[question_id] = []
        for idx, user in enumerate(ranked_users):
            partial_ranked_users = ranked_users[0:idx]
            data[question_id].append(calculate_recall(participants=participant_ids, ranked_users=partial_ranked_users))
    
    return data

def plot_results_for_participants(data, nb_of_participants, scenario):
    df = pandas.DataFrame.from_dict(data).transpose()
    
    median_data = df.median(axis=0).to_frame()
    median_data = median_data.rename(columns={0:"values"})
    median_data["idx"] = list(range(0,100))
    median_data = median_data.truncate(after=50)
    
    mean_data = df.mean(axis=0).to_frame()
    mean_data = mean_data.rename(columns={0:"values"})
    mean_data["idx"] = list(range(0,100))
    mean_data = mean_data.truncate(after=30)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=median_data["idx"], y=median_data["values"],
                        mode='markers+lines',
                        name='median'))
    fig.add_trace(go.Scatter(x=mean_data["idx"], y=mean_data["values"],
                        mode='lines+markers',
                        name='mean'))
    fig.update_layout(title=f'Scenario {scenario} - Mean/Median recall for {nb_of_participants} participants',
                       xaxis_title='Ranking',
                       yaxis_title='Recall')

    fig.update_yaxes(range=[0,1])
    
    fig.show()
    


## Scenario C

In [212]:
nb_of_participants = 2
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "C")
plot_results_for_participants(data, nb_of_participants, "C")

100%|██████████████████████████████████████████████████████████████████████████████████| 18161/18161 [05:46<00:00, 52.40it/s]


In [213]:
nb_of_participants = 3
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "C")
plot_results_for_participants(data, nb_of_participants, "C")

100%|████████████████████████████████████████████████████████████████████████████████████| 9052/9052 [02:44<00:00, 55.09it/s]


In [214]:
nb_of_participants = 4
conn = create_connection(db_file)
data_4 = recall_for_questions_participants(conn, nb_of_participants, "C")
plot_results_for_participants(data_4, nb_of_participants, "C")

100%|████████████████████████████████████████████████████████████████████████████████████| 3270/3270 [00:57<00:00, 56.55it/s]


In [215]:
nb_of_participants = 5
conn = create_connection(db_file)
data_5 = recall_for_questions_participants(conn, nb_of_participants, "C")
plot_results_for_participants(data_5, nb_of_participants, "C")

100%|████████████████████████████████████████████████████████████████████████████████████| 1181/1181 [00:20<00:00, 56.29it/s]


In [216]:
nb_of_participants = 6
conn = create_connection(db_file)
data_6 = recall_for_questions_participants(conn, nb_of_participants, "C")
plot_results_for_participants(data_6, nb_of_participants, "C")

100%|██████████████████████████████████████████████████████████████████████████████████████| 467/467 [00:08<00:00, 58.19it/s]


In [217]:
nb_of_participants = 7
conn = create_connection(db_file)
data_7 = recall_for_questions_participants(conn, nb_of_participants, "C")
plot_results_for_participants(data_7, nb_of_participants, "C")

100%|██████████████████████████████████████████████████████████████████████████████████████| 215/215 [00:03<00:00, 60.03it/s]


In [218]:
nb_of_participants = 8
conn = create_connection(db_file)
data_8 = recall_for_questions_participants(conn, nb_of_participants, "C")
plot_results_for_participants(data_8, nb_of_participants, "C")

100%|██████████████████████████████████████████████████████████████████████████████████████| 106/106 [00:01<00:00, 58.68it/s]


In [219]:
nb_of_participants = 9
conn = create_connection(db_file)
data_9 = recall_for_questions_participants(conn, nb_of_participants, "C")
plot_results_for_participants(data_9, nb_of_participants, "C")

100%|████████████████████████████████████████████████████████████████████████████████████████| 52/52 [00:00<00:00, 56.98it/s]


In [220]:
nb_of_participants = 10
conn = create_connection(db_file)
data_10 = recall_for_questions_participants(conn, nb_of_participants, "C")
plot_results_for_participants(data_10, nb_of_participants, "C")

100%|████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:00<00:00, 60.13it/s]


## Scenario A

In [221]:
nb_of_participants = 2
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "A")
plot_results_for_participants(data, nb_of_participants, "A")

100%|██████████████████████████████████████████████████████████████████████████████████| 18162/18162 [04:54<00:00, 61.66it/s]


In [222]:
nb_of_participants = 3
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "A")
plot_results_for_participants(data, nb_of_participants, "A")

100%|████████████████████████████████████████████████████████████████████████████████████| 9052/9052 [02:22<00:00, 63.42it/s]


In [223]:
nb_of_participants = 4
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "A")
plot_results_for_participants(data, nb_of_participants, "A")

100%|████████████████████████████████████████████████████████████████████████████████████| 3271/3271 [00:58<00:00, 56.38it/s]


In [224]:
nb_of_participants = 5
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "A")
plot_results_for_participants(data, nb_of_participants, "A")

100%|████████████████████████████████████████████████████████████████████████████████████| 1181/1181 [00:20<00:00, 58.00it/s]


In [225]:
nb_of_participants = 6
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "A")
plot_results_for_participants(data, nb_of_participants, "A")

100%|██████████████████████████████████████████████████████████████████████████████████████| 467/467 [00:07<00:00, 58.51it/s]


In [226]:
nb_of_participants = 7
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "A")
plot_results_for_participants(data, nb_of_participants, "A")

100%|██████████████████████████████████████████████████████████████████████████████████████| 215/215 [00:03<00:00, 62.30it/s]


In [227]:
nb_of_participants = 8
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "A")
plot_results_for_participants(data, nb_of_participants, "A")

100%|██████████████████████████████████████████████████████████████████████████████████████| 106/106 [00:01<00:00, 60.45it/s]


In [228]:
nb_of_participants = 9
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "A")
plot_results_for_participants(data, nb_of_participants, "A")

100%|████████████████████████████████████████████████████████████████████████████████████████| 52/52 [00:00<00:00, 62.04it/s]


In [229]:
nb_of_participants = 10
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "A")
plot_results_for_participants(data, nb_of_participants, "A")

100%|████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:00<00:00, 60.73it/s]


## Scenario B

In [230]:
nb_of_participants = 2
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "B")

100%|██████████████████████████████████████████████████████████████████████████████████| 18161/18161 [05:16<00:00, 57.31it/s]


In [231]:
nb_of_participants = 3
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "B")

100%|████████████████████████████████████████████████████████████████████████████████████| 9052/9052 [02:40<00:00, 56.34it/s]


In [232]:
nb_of_participants = 4
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "B")

100%|████████████████████████████████████████████████████████████████████████████████████| 3270/3270 [00:51<00:00, 62.91it/s]


In [233]:
nb_of_participants = 5
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "B")

100%|████████████████████████████████████████████████████████████████████████████████████| 1181/1181 [00:18<00:00, 65.55it/s]


In [234]:
nb_of_participants = 6
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "B")

100%|██████████████████████████████████████████████████████████████████████████████████████| 467/467 [00:07<00:00, 63.73it/s]


In [235]:
nb_of_participants = 7
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "B")

100%|██████████████████████████████████████████████████████████████████████████████████████| 215/215 [00:03<00:00, 66.45it/s]


In [236]:
nb_of_participants = 8
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "B")

100%|██████████████████████████████████████████████████████████████████████████████████████| 106/106 [00:01<00:00, 66.88it/s]


In [237]:
nb_of_participants = 9
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "B")

100%|████████████████████████████████████████████████████████████████████████████████████████| 52/52 [00:00<00:00, 67.20it/s]


In [238]:
nb_of_participants = 10
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "B")

100%|████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:00<00:00, 62.18it/s]


## Scenario D

In [239]:
nb_of_participants = 2
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "D")
plot_results_for_participants(data, nb_of_participants, "D")

100%|██████████████████████████████████████████████████████████████████████████████████| 18161/18161 [04:47<00:00, 63.07it/s]


In [240]:
nb_of_participants = 3
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "D")

100%|████████████████████████████████████████████████████████████████████████████████████| 9052/9052 [02:21<00:00, 63.79it/s]


In [241]:
nb_of_participants = 4
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "D")

100%|████████████████████████████████████████████████████████████████████████████████████| 3270/3270 [00:51<00:00, 63.75it/s]


In [242]:
nb_of_participants = 5
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "D")

100%|████████████████████████████████████████████████████████████████████████████████████| 1181/1181 [00:18<00:00, 63.41it/s]


In [243]:
nb_of_participants = 6
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "D")

100%|██████████████████████████████████████████████████████████████████████████████████████| 467/467 [00:07<00:00, 59.24it/s]


In [244]:
nb_of_participants = 7
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "D")

100%|██████████████████████████████████████████████████████████████████████████████████████| 215/215 [00:03<00:00, 63.87it/s]


In [245]:
nb_of_participants = 8
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "D")

100%|██████████████████████████████████████████████████████████████████████████████████████| 106/106 [00:01<00:00, 64.19it/s]


In [246]:
nb_of_participants = 9
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "D")

100%|████████████████████████████████████████████████████████████████████████████████████████| 52/52 [00:00<00:00, 55.03it/s]


In [247]:
nb_of_participants = 10
conn = create_connection(db_file)
data = recall_for_questions_participants(conn, nb_of_participants, "B")
plot_results_for_participants(data, nb_of_participants, "D")

100%|████████████████████████████████████████████████████████████████████████████████████████| 36/36 [00:00<00:00, 60.97it/s]
