In [1]:
%cd ..

/Users/CobaLu01/CobaRepos/recoexplainer


### Imports

In [2]:
import plotly.express as px
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import pandas as pd

import random

from recoexplainer.config import cfg
from recoexplainer.data_reader import DataReader 
from recoexplainer.models import ALS 
from recoexplainer.recommender import Recommender 
from recoexplainer.evaluator import Splitter, Evaluator
from recoexplainer.explain import ALSExplainer

## Prepare data

Import the data:

In [3]:
data = DataReader(**cfg.ml100k)

In [4]:
data.dataset.head()

Unnamed: 0,userId,itemId,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


Re-arrange users' and items' Ids:

In [5]:
data.make_consecutive_ids_in_dataset()

In [6]:
data.dataset.head()

Unnamed: 0,userId,itemId,rating,timestamp
0,0,0,3,881250949
1,1,1,3,891717742
2,2,2,1,878887116
3,3,3,2,880606923
4,4,4,1,886397596


Because ALS works on implicit feedback we need to binarize it:

In [7]:
data.binarize(binary_threshold=1)

In [8]:
data.dataset.head()

Unnamed: 0,userId,itemId,rating,timestamp
0,0,0,1.0,881250949
1,1,1,1.0,891717742
3,3,3,1.0,880606923
5,5,5,1.0,884182806
6,6,6,1.0,881171488


Prepare train and test sets:

In [9]:
sp = Splitter()
train, test = sp.split_leave_n_out(data, frac=0.1)

## Model

In [10]:
als = ALS(**cfg.model.als)



In [11]:
als.fit(train)


Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




True

## Recommendations

In [12]:
recommender = Recommender(train, als)

In [13]:
recommendations = recommender.recommend_all()

HBox(children=(HTML(value='Recommending for users: '), FloatProgress(value=0.0, max=943.0), HTML(value='')))




## Evaluations

In [14]:
eva = Evaluator(test)

In [15]:
eva.cal_hit_ratio(recommendations)

0.21909102182632967

In [16]:
eva.cal_ndcg(recommendations)

0.2265885719581136

## Explain

In [17]:
expl = ALSExplainer(als, recommendations, train)

In [18]:
explanations = expl.explain_recommendations()

HBox(children=(HTML(value='Computing explanations: '), FloatProgress(value=0.0, max=9430.0), HTML(value='')))




### Example

In [19]:
sample_users = random.sample(set(data.dataset.userId),10)
sample_expl = explanations[explanations.userId.isin(sample_users)]

In [84]:
content = pd.read_csv('datasets/ml-100k/u.item', sep='|', encoding = "ISO-8859-1", skiprows=0, engine='python', header=None)
content = content.set_index(0)[[1]]
content.columns = ['movie']

In [102]:
list(content.loc[train.get_original_item_id([1,2])].movie)

['L.A. Confidential (1997)', 'Heavyweights (1994)']

In [112]:
app = JupyterDash(__name__)
app.layout = html.Div([
    html.P("userId:"),
    dcc.Dropdown(
        id='userId',
        value=sample_users[0],
        options=[{'value': x, 'label': x} 
                 for x in sample_users],
        clearable=False
    ),
    html.P("Rank:"),
      dcc.Slider(
        id='rank',
        min=1,
        max=10,
        value=1,
        step=1,
        marks={i:'{}'.format(i) for i in range(1,11)}
    ),
    dcc.Graph(id="pie-chart"),
])

@app.callback(
    Output("pie-chart", "figure"), 
    [Input("userId", "value"), 
     Input("rank", "value")])
def generate_chart(userId, rank):
    expl = sample_expl[(sample_expl['userId'] == userId) & (sample_expl['rank'] == rank)]
    df = pd.DataFrame.from_dict(expl.explanations.iloc[0])
    df['item'] = list(content.loc[train.get_original_item_id(df.item)].movie)
    fig = px.pie(df, values='contribution', names='item', title=content.loc[int(expl.itemId)].movie)
    return fig

# Run app and display result inline in the notebook
app.run_server(mode='inline')