# Setup

In [None]:
%cd D:\opimi_test\ 

import pandas as pd
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

# Utilities

## Logs reading

In [None]:
def count_user_prefs(configuration):
  path = os.path.join('results', configuration)
  messages_df = pd.read_json(os.path.join(path, 'messages.json'))
  messages_df.drop(messages_df[( (messages_df.userID == 299939018) | (messages_df.userID == 19371450) | (messages_df.userID == 184440200) | (messages_df.userID == 1188508175) )].index, inplace = True)
  messages_df.drop(messages_df[(messages_df.recognizedObjects == '')].index, inplace=True)
  messages_df['prefsCount'] = messages_df['recognizedObjects'].map(lambda x: x.count(",") + 1)
  messages_df = messages_df.groupby(['userID'])['prefsCount'].sum().to_frame().reset_index()
  messages_df["userID"] = messages_df["userID"].astype(str)
  return messages_df

def read_logs(configuration):
  path = os.path.join('results', configuration)

  answers_df = pd.read_csv(os.path.join(path, 'answers.csv'))
  answers_df['user_id'] = answers_df['user_id'].astype(str)

  recEntityData_df = pd.read_csv(os.path.join(path, 'recEntityData.csv'))
  recEntityData_df['userID'] = recEntityData_df['userID'].astype(str)

  userData_df = pd.read_csv(os.path.join(path, 'userData.csv'))
  userData_df['userID'] = userData_df['userID'].astype(str)


  prefs_number_df = count_user_prefs(configuration)


  results_df = pd.merge(answers_df, recEntityData_df, left_on='user_id', right_on='userID')
  results_df = pd.merge(results_df, userData_df, left_on='user_id', right_on='userID')
  results_df = pd.merge(results_df, prefs_number_df, left_on='user_id', right_on='userID')
  results_df.drop(results_df[( (results_df.userID == '299939018') | (results_df.userID == '19371450') | (results_df.userID == '184440200') | (results_df.userID == '1188508175') )].index, inplace = True)
  
  results_df.rename(columns = {'queryDensity (per preference messages)':'QueryDensity'}, inplace = True)
  results_df.rename(columns = {'hitRate@1':'HitRate_1'}, inplace = True)
  results_df.rename(columns = {'hitRate@2':'HitRate_2'}, inplace = True)
  results_df.rename(columns = {'hitRate@3':'HitRate_3'}, inplace = True)
  
  results_df['totalHitRate'] = results_df['HitRate_1'] + results_df['HitRate_2'] + results_df['HitRate_3']
  results_df['convLength'] = results_df['convLength'] - 1
  results_df['control_mean'] = ((results_df['Q10'] + results_df['Q13'])/2)
  results_df['int_adequacy_mean'] = ((results_df['Q11'] + results_df['Q17'] + results_df['Q21'] + results_df['Q23'])/4)
  results_df['attitudes_mean'] = ((results_df['Q22'] + results_df['Q24'])/2)

  # Clean Q20 answers
  results_df.loc[results_df.totalHitRate == 3, 'Q20'] = 6
  results_df.loc[(results_df.totalHitRate != 3) & (results_df.Q20 == 6), 'Q20'] = 3

  results_df.loc[(results_df.HitRate_3 == 1), 'convLength'] = results_df['convLength'] - 1    # Remove the positive answer to successful recommendation message
  results_df.loc[(results_df.HitRate_3 == 1), 'prefsCount'] = results_df['prefsCount'] - 1    # Remove the preference to the liked movie
  

  del results_df['numRecLists']
  del results_df['numLikes']
  del results_df['numRecEntities']
  del results_df['numRecEntities (alt)']
  del results_df['accuracy']
  del results_df['accuracy (alt)']
  del results_df['meanAveragePrecision']
  del results_df['meanAveragePrecision (alt)']
  del results_df['nDCG']
  del results_df['numTrainQuestions']
  del results_df['numRecQuestions']
  del results_df['avgTimePerQuestion']
  del results_df['totalInteractionTime']
  del results_df['queryDensity']
  del results_df['queryEfficiency']
  del results_df['queryEfficiency (training)']
  del results_df['avgSessionTime']
  del results_df['skipRatio']
  del results_df['likeRatio']
  del results_df['dislikeRatio']
  del results_df['fallbackRatio']
  del results_df['avgSystemResponseTime']
  del results_df['disambiguatioRatio']
  del results_df['userID_x']
  del results_df['userID_y']
  del results_df['userID']


  return results_df

def hit_rate_perc(results_df, n):
  seriesObj = results_df.apply(lambda x: True if x['totalHitRate'] == 3 else False , axis=1)
  hr1 = len(seriesObj[seriesObj == True].index)
  seriesObj = results_df.apply(lambda x: True if x['totalHitRate'] == 2 else False , axis=1)
  hr2 = len(seriesObj[seriesObj == True].index)
  seriesObj = results_df.apply(lambda x: True if x['totalHitRate'] == 1 else False , axis=1)
  hr3 = len(seriesObj[seriesObj == True].index)
  seriesObj = results_df.apply(lambda x: True if x['totalHitRate'] == 0 else False , axis=1)
  no_hit = len(seriesObj[seriesObj == True].index)
  return [hr1*100/n, hr2*100/n, hr3*100/n, no_hit*100/n]

## Plots

In [None]:
def simple_bar_plot(data, labels, plot_title):
  figure(figsize=(8, 6), dpi=80)
  plt.barh(labels, data, color=['#4472C4'])
  plt.title(plot_title, size=20)
  for index, value in enumerate(data):
      plt.text(value, index, str('%.2f' %value), size=20)
  plt.yticks(size=16)
  plt.gca().spines['right'].set_color('none')
  plt.gca().spines['left'].set_color('none')
  plt.gca().spines['top'].set_color('none')
  plt.gca().spines['bottom'].set_color('none')
  plt.gca().axes.xaxis.set_visible(False)
  plt.show()

def hit_rate_simple_plot(data, labels, k):
  figure(figsize=(8, 6), dpi=80)
  plt.barh(labels,data, color=['#4472C4'])
  plt.title('HitRate@' + str(k), size=20)
  plt.xlabel('% of successful recommendations at turn ' + str(k), size=18)
  for index, value in enumerate(data):
      plt.text(value, index, str('%.2f' %value), size="20")
  plt.gca().set_xlim([0,100])
  plt.yticks(size=16)
  plt.gca().spines['right'].set_color('none')
  plt.gca().spines['top'].set_color('none')
  plt.show()

def questionnaire_barplot(question_number, question_title):
  wiki_mean = (5 - wiki_results[question_number].mean()) + 1
  aspects_mean = (5 - aspects_results[question_number].mean()) + 1
  wikiaspects_mean = (5 - wikiaspects_results[question_number].mean()) + 1
  mean_data = [wikiaspects_mean, aspects_mean, wiki_mean]

  mean_labels = ['Wiki + Aspects', 'Aspects', 'Wiki']

  x_labels = ['1 - Strongly Disagree', ' ', ' ', ' ', '5 - Strongly Agree']
  
  figure(figsize=(8, 6), dpi=80)
  plt.barh(mean_labels,mean_data, color=['#4472C4'])
  plt.title(question_title, size=20)
  for index, value in enumerate(mean_data):
      plt.text(value-0.5, index, str('%.2f' %value), size="20", color='white')
  plt.gca().set_xlim([1,5])
  plt.xticks(ticks=range(1,6), labels=x_labels, rotation='horizontal', size=16)
  plt.yticks(size=16)
  plt.gca().spines['right'].set_color('none')
  plt.gca().spines['top'].set_color('none')
  plt.gca().spines['top'].set_color('none')
  plt.show()

## Labels

In [None]:
config_labels = ['Wiki','Aspects','Wiki + Aspects']
hr_labels = ['@1','@2','@3','No Hit']

# Data reading

In [None]:
# Complete results dataframes
wiki_results = read_logs('wiki')
aspects_results = read_logs('aspects')
wikiaspects_results = read_logs('wikiaspects')

# Fix values for user 715108529
aspects_results.loc[(aspects_results.user_id == 715108529), 'prefsCount'] = 5
aspects_results.loc[(aspects_results.user_id == 715108529), 'convLength'] = 11

'''
# Select restricted user group
group_a = aspects_results.drop(aspects_results[( (aspects_results.Q7 == 4) | (aspects_results.Q7 == 5))].index)
group_a_ids = group_a['user_id'].tolist()

wiki_results = wiki_results[wiki_results.user_id.isin(group_a_ids)]
aspects_results = aspects_results[aspects_results.user_id.isin(group_a_ids)]
wikiaspects_results = wikiaspects_results[wikiaspects_results.user_id.isin(group_a_ids)]
'''

sample_size = len(wiki_results['user_id'])

print(sample_size)

# QueryDensity

In [None]:
data = [wikiaspects_results.QueryDensity.mean(), aspects_results.QueryDensity.mean(), wiki_results.QueryDensity.mean()]
config_labels = ['Wiki + Aspects', 'Aspects', 'Wiki']
simple_bar_plot(data, config_labels, 'QueryDensity')

# Conversation Length

In [None]:
data = [wiki_results.convLength.mean(), aspects_results.convLength.mean(), wikiaspects_results.convLength.mean()]

simple_bar_plot(data, config_labels, 'Mean Conversation Length')

# Mean number of preferences

In [None]:
data = [wiki_results.prefsCount.mean(), aspects_results.prefsCount.mean(), wikiaspects_results.prefsCount.mean()]

simple_bar_plot(data, config_labels, 'Mean number of preferences')

# HitRate@K

## HitRate@1 Comparison

In [None]:
data = [hit_rate_perc(wiki_results, sample_size)[0], hit_rate_perc(aspects_results, sample_size)[0], hit_rate_perc(wikiaspects_results, sample_size)[0]]

hit_rate_simple_plot(data, config_labels, 1)

## HitRate@2 Comparison

In [None]:
data = [hit_rate_perc(wiki_results, sample_size)[0]+hit_rate_perc(wiki_results, sample_size)[1], hit_rate_perc(aspects_results, sample_size)[0]+hit_rate_perc(aspects_results, sample_size)[1], hit_rate_perc(wikiaspects_results, sample_size)[0]+hit_rate_perc(wikiaspects_results, sample_size)[1]]

hit_rate_simple_plot(data, config_labels, 2)

## HitRate@3 Comparison

In [None]:
data = [hit_rate_perc(wiki_results, sample_size)[0]+hit_rate_perc(wiki_results, sample_size)[1]+hit_rate_perc(wiki_results, sample_size)[2], hit_rate_perc(aspects_results, sample_size)[0]+hit_rate_perc(aspects_results, sample_size)[1]+hit_rate_perc(aspects_results, sample_size)[2], hit_rate_perc(wikiaspects_results, sample_size)[0]+hit_rate_perc(wikiaspects_results, sample_size)[1]+hit_rate_perc(wikiaspects_results, sample_size)[2]]

hit_rate_simple_plot(data, config_labels, 3)

# Questionnaire

## Demographics

### Gender

In [None]:
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q1'] == 1 else False , axis=1)
males = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q1'] == 2 else False , axis=1)
females = len(seriesObj[seriesObj == True].index)

data = [males, females]
labels = ['Male','Female']

figure(figsize=(8, 6), dpi=80)
plt.pie(data,labels=labels,autopct='%1.1f%%', textprops={'fontsize': 18})
plt.title('Users gender', y=1.1, size=20)
plt.axis('equal')
plt.show()

### Age group

In [None]:
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q2'] == 2 else False , axis=1)
a_21_30 = len(seriesObj[seriesObj == True].index)

data = [a_21_30]
labels = ['21-30']

figure(figsize=(8, 6), dpi=80)
plt.pie(data,labels=labels,autopct='%1.1f%%', textprops={'fontsize': 18})
plt.title('Users age group', y=1.1, size=20)
plt.axis('equal')
plt.show()

### Education level

In [None]:
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q3'] == 2 else False , axis=1)
high = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q3'] == 3 else False , axis=1)
college = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q3'] == 4 else False , axis=1)
graduate = len(seriesObj[seriesObj == True].index)

data = [high, college, graduate]
labels = ['High School', 'College', 'Graduate School']

figure(figsize=(8, 6), dpi=80)
plt.pie(data,labels=labels,autopct='%1.1f%%', textprops={'fontsize': 18})
plt.title('Users education level', y=1.1, size=20)
plt.axis('equal')
plt.show()

### Current Employment

In [None]:
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q4'] == 1 else False , axis=1)
student = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q4'] == 2 else False , axis=1)
public = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q4'] == 3 else False , axis=1)
private = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q4'] == 4 else False , axis=1)
self = len(seriesObj[seriesObj == True].index)

data = [student, public, private, self]
labels = ['Student', 'Public Company Staff', 'Private Company Staff', 'Self Employed']

figure(figsize=(8, 6), dpi=80)
plt.pie(data,labels=labels,autopct='%1.1f%%', textprops={'fontsize': 18})
plt.title('Users current employment', y=1.1, size=20)
plt.axis('equal')
plt.show()

### Computer use level

In [None]:
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q5'] == 1 else False , axis=1)
no_exp = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q5'] == 2 else False , axis=1)
beginner = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q5'] == 3 else False , axis=1)
average = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q5'] == 4 else False , axis=1)
advanced = len(seriesObj[seriesObj == True].index)

data = [no_exp, beginner, average, advanced]
labels = ['No Experience', 'Beginner', 'Average', 'Advanced']

figure(figsize=(8, 6), dpi=80)
plt.pie(data,labels=labels,autopct='%1.1f%%', textprops={'fontsize': 18})
plt.title('How would you rate yourself as a computer user?', y=1.1, size=20)
plt.axis('equal')
plt.show()

### Previous RS use

In [None]:
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q6'] == 1 else False , axis=1)
yes = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q6'] == 2 else False , axis=1)
no = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q6'] == 3 else False , axis=1)
maybe = len(seriesObj[seriesObj == True].index)

data = [yes, no, maybe]
labels = ['Yes', 'No', 'Maybe']

figure(figsize=(8, 6), dpi=80)
plt.pie(data,labels=labels,autopct='%1.1f%%', textprops={'fontsize': 18})
plt.title('Have you ever used a recommender system before?', y=1.1, size=20)
plt.axis('equal')
plt.show()

### DA use frequency

In [None]:
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q7'] == 1 else False , axis=1)
never = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q7'] == 2 else False , axis=1)
v_inf = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q7'] == 3 else False , axis=1)
inf = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q7'] == 4 else False , axis=1)
mod = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q7'] == 5 else False , axis=1)
daily = len(seriesObj[seriesObj == True].index)

data = [never, v_inf, inf, mod, daily]
labels = ['Never', 'Very Infrequently', 'Infrequently', 'Moderately', 'Regularly']

figure(figsize=(8, 6), dpi=80)
plt.pie(data,labels=labels,autopct='%1.1f%%', textprops={'fontsize': 14})
plt.title('How frequently have you used conversational agents \n and digital assistants?', y=1.1, size=20)
plt.axis('equal')
plt.show()

### Movie interest

In [None]:
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q8'] == 1 else False , axis=1)
high = len(seriesObj[seriesObj == True].index)
seriesObj = wikiaspects_results.apply(lambda x: True if x['Q8'] == 2 else False , axis=1)
medium = len(seriesObj[seriesObj == True].index)

data = [high, medium]
labels = ['High', 'Medium']

figure(figsize=(8, 6), dpi=80)
plt.pie(data,labels=labels,autopct='%1.1f%%', textprops={'fontsize': 18})
plt.title('How much are you interested in movies?', y=1.1, size=20)
plt.axis('equal')
plt.show()

## Comparison

### Ease of use

In [None]:
questionnaire_barplot('Q9', 'I became familiar with the recommender system very quickly')

### Control

In [None]:
questionnaire_barplot('Q10', 'I feel in control of expressing my actual preferences')

### Interaction adequacy

In [None]:
questionnaire_barplot('Q11', 'I found it easy to tell the system what I like/dislike')

### Recommendation accuracy

In [None]:
questionnaire_barplot('Q12', 'The items recommended to me match my interests')

### Transparency

In [None]:
questionnaire_barplot('Q13', 'I understood why the items were recommended to me')

### Novelty

In [None]:
questionnaire_barplot('Q14', 'This recommender system helped me discover new products')

### Serendipity

In [None]:
questionnaire_barplot('Q15', 'This recommender system provided me with unexpected suggestions')

### Use intentions

In [None]:
questionnaire_barplot('Q16', 'I will use this recommender again')

### Interaction adequacy

In [None]:
questionnaire_barplot('Q17', 'It is easy for me to inform the system if I like/dislike the recommended items')

### Interface sufficiency

In [None]:
questionnaire_barplot('Q18', 'The information provided for the recommended items \n is sufficient for me to make a purchase/download decision')

### Perceived usefulness

In [None]:
questionnaire_barplot('Q19', 'The recommender helped me find the ideal item')

### Control

In [None]:
question_number = 'Q20'
question_title = 'The recommendations get better as I interact with the system'

w_q20 = wiki_results.drop(wiki_results[(wiki_results.Q20 == 6)].index)
a_q20 = aspects_results.drop(aspects_results[(aspects_results.Q20 == 6)].index)
wa_q20 = wikiaspects_results.drop(wikiaspects_results[(wikiaspects_results.Q20 == 6)].index)

wiki_mean = 5 - w_q20[question_number].mean()
aspects_mean = 5 - a_q20[question_number].mean()
wikiaspects_mean = 5 - wa_q20[question_number].mean()
mean_data = [wiki_mean, aspects_mean, wikiaspects_mean]

mean_labels = ['Wiki', 'Aspects', 'Wiki + Aspects']
x_labels = ['0 - Strongly Disagree', ' ', ' ', ' ', '4 - Strongly Agree']

figure(figsize=(8, 6), dpi=80)
plt.barh(mean_labels,mean_data, color=['#4472C4'])
plt.title(question_title, size=20)
for index, value in enumerate(mean_data):
    plt.text(value-0.5, index, str('%.2f' %value), size="20", color='white')
plt.gca().set_xlim([0,4])
plt.xticks(ticks=range(0,5), labels=x_labels, rotation='horizontal', size=16)
plt.yticks(size=16)
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.show()

### Interaction adequacy

In [None]:
questionnaire_barplot('Q21', 'It is easy for the recommender to understand what I said')

### Confidence & Trust

In [None]:
questionnaire_barplot('Q22', 'The recommender can be trusted')

### Interaction adequacy

In [None]:
questionnaire_barplot('Q23', 'The preferences in my profile correspond \n to the preferences expressed to the recommender')

### Overall satisfaction

In [None]:
questionnaire_barplot('Q24', 'Overall, I am satisfied with the recommender')

## Control mean

In [None]:
wiki_mean = 5 - ((wiki_results['Q10'].mean() + w_q20['Q20'].mean() + wiki_results['Q13'].mean())/3)
aspects_mean = 5 - ((aspects_results['Q10'].mean() + a_q20['Q20'].mean() + aspects_results['Q13'].mean())/3)
wikiaspects_mean = 5 - ((wikiaspects_results['Q10'].mean() + wa_q20['Q20'].mean() + wikiaspects_results['Q13'].mean())/3)
mean_data = [wiki_mean, aspects_mean, wikiaspects_mean]

mean_labels = ['Wiki', 'Aspects', 'Wiki + Aspects']
x_labels = ['Strongly Disagree', ' ', ' ', ' ', 'Strongly Agree']

figure(figsize=(8, 6), dpi=80)
plt.barh(mean_labels,mean_data, color=['#4472C4'])
plt.title('Control mean answers', size=20)
for index, value in enumerate(mean_data):
    plt.text(value-0.5, index, str('%.2f' %value), size="20", color='white')
plt.gca().set_xlim([0,4])
plt.xticks(ticks=range(0,5), labels=x_labels, rotation='horizontal', size=16)
plt.yticks(size=16)
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.show()

## Interaction adequacy mean

In [None]:
wiki_mean = 5 - ((wiki_results['Q11'].mean() + wiki_results['Q17'].mean() + wiki_results['Q21'].mean() + wiki_results['Q23'].mean())/4)
aspects_mean = 5 - ((aspects_results['Q11'].mean() + aspects_results['Q17'].mean() + aspects_results['Q21'].mean() + aspects_results['Q23'].mean())/4)
wikiaspects_mean = 5 - ((wikiaspects_results['Q11'].mean() + wikiaspects_results['Q17'].mean() + wikiaspects_results['Q21'].mean() + wikiaspects_results['Q23'].mean())/4)
mean_data = [wiki_mean, aspects_mean, wikiaspects_mean]

mean_labels = ['Wiki', 'Aspects', 'Wiki + Aspects']
x_labels = ['Strongly Disagree', ' ', ' ', ' ', 'Strongly Agree']

figure(figsize=(8, 6), dpi=80)
plt.barh(mean_labels,mean_data, color=['#4472C4'])
plt.title('Interaction adequacy mean answers', size=20)
for index, value in enumerate(mean_data):
    plt.text(value-0.5, index, str('%.2f' %value), size="20", color='white')
plt.gca().set_xlim([0,4])
plt.xticks(ticks=range(0,5), labels=x_labels, rotation='horizontal', size=16)
plt.yticks(size=16)
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.show()

## Attitudes mean

In [None]:
wiki_mean = 5 - ((wiki_results['Q22'].mean() + wiki_results['Q24'].mean())/2)
aspects_mean = 5 - ((aspects_results['Q22'].mean() + aspects_results['Q24'].mean())/2)
wikiaspects_mean = 5 - ((wikiaspects_results['Q22'].mean() + wikiaspects_results['Q24'].mean())/2)
mean_data = [wiki_mean, aspects_mean, wikiaspects_mean]

mean_labels = ['Wiki', 'Aspects', 'Wiki + Aspects']
x_labels = ['Strongly Disagree', ' ', ' ', ' ', 'Strongly Agree']

figure(figsize=(8, 6), dpi=80)
plt.barh(mean_labels,mean_data, color=['#4472C4'])
plt.title('Attitudes mean answers', size=20)
for index, value in enumerate(mean_data):
    plt.text(value-0.5, index, str('%.2f' %value), size="20", color='white')
plt.gca().set_xlim([0,4])
plt.xticks(ticks=range(0,5), labels=x_labels, rotation='horizontal', size=16)
plt.yticks(size=16)
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.show()

# McNemar

In [None]:
from statsmodels.stats.contingency_tables import mcnemar

def get_contingency_table(conf1, conf2, metric):
    cont_table = [[0, 0], [0, 0]]
    user_ids = []
    for i, row in conf1.iterrows():
        user_ids.append(row["user_id"])
    for user_id in user_ids:
        row1 = conf1.loc[conf1["user_id"] == user_id]
        row2 = conf2.loc[conf2["user_id"] == user_id]
        cont_table[int(row1[metric])][int(row2[metric])] += 1
    return cont_table

def mcnemar_test(conf1, conf2, metric):
    cont_table = get_contingency_table(conf1, conf2, metric)
    return mcnemar(cont_table, exact=False, correction=False)

In [None]:
print(mcnemar_test(wiki_results, aspects_results, "HitRate_3"))

# Wilcoxon

In [None]:
group_a = wiki_results.drop(wikiaspects_results[( (wikiaspects_results.Q8 == 2))].index)
group_a_ids = group_a['user_id'].tolist()

wiki_res_a = wiki_results[wiki_results.user_id.isin(group_a_ids)]
aspects_res_a = aspects_results[aspects_results.user_id.isin(group_a_ids)]
wikiaspects_res_a = wikiaspects_results[wikiaspects_results.user_id.isin(group_a_ids)]

confs_a = [wiki_res_a, aspects_res_a, wikiaspects_res_a]

In [None]:
group_b = wiki_results.drop(wikiaspects_results[( (wikiaspects_results.Q8 == 1))].index)
group_b_ids = group_b['user_id'].tolist()

wiki_res_b = wiki_results[wiki_results.user_id.isin(group_b_ids)]
aspects_res_b = aspects_results[aspects_results.user_id.isin(group_b_ids)]
wikiaspects_res_b = wikiaspects_results[wikiaspects_results.user_id.isin(group_b_ids)]

confs_b = [wiki_res_b, aspects_res_b, wikiaspects_res_b]

In [None]:
from scipy.stats import wilcoxon

confs = [wiki_results, aspects_results, wikiaspects_results]
conf_names = ["wiki", "aspect", "wikiaspect"]

metrics_int = ["QueryDensity", "prefsCount", "control_mean", "int_adequacy_mean", "attitudes_mean"]                                                                     
metrics_acc = ["HitRate_1", "HitRate_3"]                                                                                   
questions = ["Q9", "Q10", "Q11", "Q12", "Q13", "Q14", "Q15", "Q16", "Q17", "Q18", "Q19", "Q20", "Q21", "Q22", "Q23", "Q24"]


# This string contains the following information:
# <configuration 1 name>,<configuration 2 name>,<metric name>,<p-value>,<number of degrees of freedom>
csv = ""

# For each metric
for metric in metrics_int:
    # For each pair of configurations calculate the statistical test
    for i in range(len(confs)):
        for j in range(i + 1, len(confs)):
            confi_int = confs[i]
            confj_int = confs[j]
            confi_metric = confi_int[metric]
            confj_metric = confj_int[metric]
            ttest = wilcoxon(confi_metric, confj_metric)                    # Calculate the test
            df = confi_metric.shape[0] + confj_metric.shape[0] - 2
            csv += conf_names[i] + "," + conf_names[j] + "," + metric + "," + str(ttest.pvalue) + "," + str(ttest.statistic) + "," + str(df) + "\n"

# For each metric
for metric in metrics_acc:
    # For each pair of configurations calculate the statistical test
    for i in range(len(confs)):
        for j in range(i + 1, len(confs)):
            wiki_acc = confs[i]
            aspects_acc = confs[j]
            wiki_metric = wiki_acc[metric]
            aspects_metric = aspects_acc[metric]
            ttest = wilcoxon(wiki_metric, aspects_metric)                    # Calculate the test
            df = wiki_metric.shape[0] + aspects_metric.shape[0] - 2
            csv += conf_names[i] + "," + conf_names[j] + "," + metric + "," + str(ttest.pvalue) + "," + str(ttest.statistic) + "," + str(df) + "\n"

# For each question
for metric in questions:
    # For each pair of configurations calculate the statistical test
    for i in range(len(confs)):
        for j in range(i + 1, len(confs)):
            confi_q = confs[i]
            confj_q = confs[j]
            confi_metric = confi_q[metric]
            wiki_metric = wiki_metric[wiki_metric != 0]                     # Remove answers with ID 0
            wiki_metric = wiki_metric[wiki_metric != 6]                     # Remove answers with ID 6
            confj_metric = confj_q[metric]
            aspects_metric = aspects_metric[aspects_metric != 0]            # Remove answers with ID 0
            aspects_metric = aspects_metric[aspects_metric != 6]            # Remove answers with ID 6
            ttest = wilcoxon(confi_metric, confj_metric)                    # Calculate the test
            df = confi_metric.shape[0] + confj_metric.shape[0] - 2
            csv += conf_names[i] + "," + conf_names[j] + "," + metric + "," + str(ttest.pvalue) + "," + str(ttest.statistic) + "," + str(df) + "\n"


text_file = open("test_output.txt", "w")
text_file.write(csv)
text_file.close()


test_output = pd.read_csv("test_output.txt", names =['configuration 1 name', 'configuration 2 name', 'metric name', 'p_value', 'statistic', 'number of degrees of freedom'], index_col=None, header=None)
test_output.to_csv('test_output.txt', sep='|', index=None)


p_value_ok = test_output.drop(test_output[(test_output.p_value > 0.05)].index)
p_value_ok.to_csv('p_value.txt', sep='|', index=None)

pd.set_option('max_columns', None)
pd.set_option('max_rows', None)

p_value_ok.head(n=100)