# Important

`make scores` has to be run before running any notebook cell

# Imports

In [None]:
import pandas as pd
import seaborn as sns
sns.set(context='paper', style='white', palette='muted', rc={'figure.figsize':(11.7,8.27)})

# Accuracy

## Results

In [None]:
df_accuracy = pd.read_csv("../results/cf-accuracy-results.csv", index_col='model')

In [None]:
df_accuracy.sort_values('rmse')

## Distribution

In [None]:
df_pred_so = pd.read_csv("../models/predictions/cf-results/testset/slopeone-testset-predictions.csv")
df_pred_knn = pd.read_csv("../models/predictions/cf-results/testset/knn-testset-predictions.csv")
df_pred_svd = pd.read_csv("../models/predictions/cf-results/testset/svd-testset-predictions.csv")

In [None]:
df_pred_so['err'] = abs(df_pred_so.est - df_pred_so.rating)
df_pred_knn['err'] = abs(df_pred_knn.est - df_pred_knn.rating)
df_pred_svd['err'] = abs(df_pred_svd.est - df_pred_svd.rating)

As the KNN and SVD results are very similar, the SlopeOne and KNN results are compared.

## Worst and best scenarios

In [None]:
df_pred_so.sort_values('err').tail(5)

In [None]:
df_pred_svd.sort_values('err').tail(5)

In [None]:
df_pred_knn[df_pred_so.err >= 3.5].head(10)

## Estimates distributions

In [None]:
df_pred_so.est.describe()

In [None]:
df_pred_knn.est.describe()

In [None]:
sns.distplot(df_pred_so.est)

In [None]:
sns.distplot(df_pred_knn.est)

In [None]:
df_pred_so.err.describe()

In [None]:
df_pred_knn.err.describe()

## Neighbors requirement

In [None]:
k_vals = df_pred_knn['details'].apply(lambda x : dict(eval(x))).apply(pd.Series)['actual_k']

In [None]:
k_vals.describe()

In [None]:
k_vals[k_vals < 10].count() / len(k_vals)

# Effectiveness

In [None]:
df_eff = pd.read_csv("../results/cf-effectiveness-results-n.csv", index_col='model')

In [None]:
df_eff[df_eff.n == 20]

In [None]:
df_multiline = df_eff[df_eff.index == 'svd-predictions.csv'].melt('n', var_name='cols',  value_name='vals')

In [None]:
g = sns.catplot(x="n", y="vals", hue='cols', data=df_multiline)
for ax in g.axes.flat:
    labels = ax.get_xticklabels()
    for i,l in enumerate(labels):
        if((i+1)%10 != 0 and i != 0): labels[i] = ''
    ax.set_xticklabels(labels)
g.set(xlabel ='Number of recommendations', ylabel ='Metric value') 
g._legend.set_title('Metric and testset')
new_labels = ['Precision - to read', 'Precision - ratings', 'Recall - to read', 'Recall - ratings']
for t, l in zip(g._legend.texts, new_labels): t.set_text(l)

In [None]:
g.get_figure().savefig("tmp.pdf")