In [2]:
import plotly.graph_objects as go
from plotly.colors import n_colors
import pandas as pd
import ast
import numpy as np

In [3]:
dataset = pd.read_csv('../dataset/HR_result.csv').to_dict('records')

movies = pd.read_csv('../dataset/simpler_movie_dataset.csv')
movies = movies.drop_duplicates(subset='id').set_index('id').to_dict('index')

users = pd.read_csv('../dataset/user_metadata.csv', index_col='userId').to_dict('index')

In [4]:
(np.linspace(1, 2, 12)[:, np.newaxis] * np.random.randn(12, 200) +
            (np.arange(12) + 2 * np.random.random(12))[:, np.newaxis])

array([[ 1.34441345,  0.06415988,  2.24302346, ...,  0.53209049,
        -0.83336266,  1.85037321],
       [ 1.41938219,  2.08764212,  1.32946412, ...,  2.45554603,
         0.6692258 ,  0.69661489],
       [ 3.74936234,  4.57484867,  2.96920623, ...,  3.83265669,
         3.23411156,  5.00458765],
       ...,
       [11.56615811, 11.29005397,  8.36706319, ...,  8.47391381,
         9.69859888, 10.21920488],
       [12.89227191, 11.36271609, 11.13638502, ..., 11.05717516,
         8.63896567, 12.44052333],
       [10.11582761, 10.78404351, 16.78885733, ..., 12.78417988,
        17.30960775, 12.09595789]])

In [5]:
data = {'Heavy Ranker': [x['MRR'] for x in dataset], 'Candidate Generator': [x['recall@5000'] for x in dataset]}

In [6]:
data.keys()

dict_keys(['Heavy Ranker', 'Candidate Generator'])

In [7]:
# the distribution of MRR 

data = {'Heavy Ranker (MRR)': [x['MRR'] for x in dataset], 'Candidate Generator (Recall@K)': [x['recall@5000'] for x in dataset]}
ops = [1, 0.5]
colors = ['#3D5361', '#4C3949']
fig = go.Figure()
for data_line, color, op in zip(data.keys(), colors, ops):
    fig.add_trace(go.Violin(x=data[data_line], line_color=color, meanline_visible=True, name=data_line, opacity=op))

fig.update_traces(orientation='h', side='positive', width=9, points=False)
fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False, 
        plot_bgcolor = 'rgba(0, 0, 0, 0)',
        # paper_bgcolor = 'rgba(0, 0, 0, 0)',
        margin = dict(l=0,r=0,b=1,t=50,pad=0),title_text="<b>Distribution of Accuracy Metrics for Each User and Each Model<b>",
        showlegend=True,
        legend=dict(
                yanchor="top",
                y=0.99,
                xanchor="left",
                x=0.6)
        )
fig.update_yaxes(visible=False)
fig.show()

In [8]:
data = {'len': [x['len'] for x in dataset], 'MRR': [x['MRR'] for x in dataset], 'Recall': [x['recall@5000'] for x in dataset]}

In [9]:
data = {'len': [x['len'] for x in dataset], 'MRR': [x['MRR'] for x in dataset], 'Recall': [x['recall@5000'] for x in dataset]}

fig = go.Figure()

fig.add_trace(go.Scatter(x=data['len'], y=data['Recall'], mode='markers',  marker_color='#3D5361', 
                        opacity=0.6,
                        name='Recall'))

# fig.add_trace(go.Scatter(x=data['len'], y=data['MRR'], mode='markers', marker_color=colors[1], name='MRR'))

fig.update_traces(mode='markers', marker_line_width=0, marker_size=8)

fig.update_layout(
        plot_bgcolor = 'rgba(0, 0, 0, 0)',
        # paper_bgcolor = 'rgba(0, 0, 0, 0)',
        margin = dict(l=5,r=5,b=5,t=30,pad=0),
        xaxis_showgrid=False, yaxis_showgrid=False,
        xaxis_title="Number of User Watch Histories",
        yaxis_title="Recall@K",
        legend=dict(
                yanchor="top",
                y=0.6,
                xanchor="left",
                x=0.8),
        title=dict(
                text="Correlation of Recall and Number of Users Watch History"
        )
        )

fig.show()

In [10]:
colors = ['#3D5361', '#4C3949']

fig = go.Figure()

fig.add_trace(go.Scatter(x=data['len'], y=data['MRR'], mode='markers', marker_color='#4C3949', name='MRR'))

fig.update_traces(mode='markers', marker_line_width=0, marker_size=8)

fig.update_layout(
        plot_bgcolor = 'rgba(0, 0, 0, 0)',
        # paper_bgcolor = 'rgba(0, 0, 0, 0)',
        margin = dict(l=5,r=5,b=5,t=30,pad=0),
        xaxis_showgrid=False, yaxis_showgrid=False,
        xaxis_title="Number of User Watch Histories",
        yaxis_title="MRR",
        legend=dict(
                yanchor="top",
                y=0.6,
                xanchor="left",
                x=0.8),
        title=dict(
                text="Correlation of MRR and Number of Users Watch History"
        )
        )

fig.show()

In [19]:
dataset = pd.read_csv('../dataset/HR_result.csv').to_dict('records')

dataset = pd.DataFrame.from_dict(dataset)
dataset['movies'] = dataset['movies'].apply(ast.literal_eval)
dataset = dataset.explode('movies')[['userId', 'movies', 'movieId']]
dataset['title'] = dataset['movies'].apply(lambda x: movies[int(x)]['title']).apply(pd.Series)

In [21]:
topmovies = dataset[['movies','title']].value_counts().head(20).reset_index(name='Counts')

In [37]:
def finding_mvs(movies, movieId):
    if movies in movieId:
        return 1
    else:
        return 0

In [40]:
get_accuracy = dataset.merge(topmovies)

In [42]:
get_accuracy['is_watched'] = get_accuracy.apply(lambda x: finding_mvs(x['movies'], x['movieId']), axis=1)

In [47]:
topmovies.set_index('movies', inplace=True)

In [49]:
topmovies['accuracy'] = get_accuracy.groupby('movies')['is_watched'].sum()

In [54]:
topmovies['counts_pct'] = topmovies['Counts']/388
topmovies['accuracy_pct'] = topmovies['accuracy']/388

In [61]:
topmovies['counts_pct'] = topmovies['counts_pct'].apply(lambda x: round(x, 4)*100)
topmovies['accuracy_pct'] = topmovies['accuracy_pct'].apply(lambda x: round(x, 4)*100)

In [65]:
topmovies.to_dict('list').keys()

dict_keys(['title', 'Counts', 'accuracy', 'counts_pct', 'accuracy_pct'])

In [64]:
topmovies.to_dict('list').values()

dict_values([['The Sixth Sense', 'Ride Lonesome', 'The Run of the Country', 'Minions', 'Vali', 'Lovelines', 'Apache Country', 'Home Made Home', 'The Gun That Won the West', 'Cousin, Cousine', 'Virtue', 'The Holy Modal Rounders: Bound to Lose', 'Stalker', 'The Haunted House', 'Duel of Hearts', '10 Items or Less', 'The Walking Stick', 'Furious 7', 'Oldboy', 'Scarface'], [136, 132, 130, 87, 82, 75, 73, 62, 62, 60, 60, 59, 59, 58, 53, 53, 52, 50, 49, 49], [21, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 1, 0, 0, 0, 1, 0, 0, 0, 7], [35.05, 34.02, 33.51, 22.42, 21.13, 19.33, 18.81, 15.98, 15.98, 15.459999999999999, 15.459999999999999, 15.21, 15.21, 14.95, 13.66, 13.66, 13.4, 12.889999999999999, 12.629999999999999, 12.629999999999999], [5.41, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.03, 0.0, 0.26, 0.0, 0.0, 0.0, 0.26, 0.0, 0.0, 0.0, 1.7999999999999998]])

In [76]:
fig = go.Figure(data=go.Table(
        header=dict(
            values=['title', 'counts', 'accuracy', 'counts_pct', 'accuracy_pct'],
            font=dict(size=14, color='#683B2B'),
            align="center",
            fill_color='#DeD1BD'
        ),
        columnwidth = [200,80,80,80,80],
        cells=dict(
            values=[['The Sixth Sense', 'Ride Lonesome', 'The Run of the Country', 'Minions', 'Vali', 'Lovelines', 
            'Apache Country', 'Home Made Home', 'The Gun That Won the West', 'Cousin, Cousine', 'Virtue', 
            'The Holy Modal Rounders: Bound to Lose', 'Stalker', 'The Haunted House', 'Duel of Hearts', '10 Items or Less', 
            'The Walking Stick', 'Furious 7', 'Oldboy', 'Scarface'], 
            [136, 132, 130, 87, 82, 75, 73, 62, 62, 60, 60, 59, 59, 58, 53, 53, 52, 50, 49, 49], 
            [21, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 1, 0, 0, 0, 1, 0, 0, 0, 7], 
            [35.05, 34.02, 33.51, 22.42, 21.13, 19.33, 18.81, 15.98, 15.98, 15.46, 15.46, 
            15.21, 15.21, 14.95, 13.66, 13.66, 13.4, 12.89, 12.63, 12.63], 
            [5.41, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.03, 0.0, 0.26, 0.0, 0.0, 0.0, 0.26, 0.0, 0.0, 0.0, 1.8]],
            align = "left",
            fill_color='white',
            font=dict(size=12),
            line_color='#683B2B')))

fig.update_layout(
    plot_bgcolor = 'rgba(0, 0, 0, 0)',
    paper_bgcolor = 'rgba(0, 0, 0, 0)',
    margin = dict(l=0,r=0,b=1,t=5,pad=0),
)