# PyShopper example
---
- This notebook contains a quick example of PyShopper that includes:
1. Loading data
2. Instantiating and fitting the Shopper model via MCMC sampling or variational inference
3. Inference diagnostics
4. Prediction on unseen test data

In [1]:
# Imports

import numpy as np
import pandas as pd
import warnings

import theano

from pyshopper import shopper

from tqdm.notebook import tqdm

# Ignore FutureWarning and UserWarning
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=UserWarning)

## 1. Load data

In [2]:
# Load data

X_train = shopper.load_data('data/train.tsv',
                            'data/prices.tsv')
X_train

Unnamed: 0,user_id,item_id,session_id,quantity,price
0,1,100,1,1,1.0
1,2,100,1,1,1.0
2,4,100,1,1,1.0
3,5,100,1,1,1.0
4,6,100,1,1,1.0
...,...,...,...,...,...
306042,208,200,123,1,5.0
306043,209,200,123,1,5.0
306044,227,200,123,1,5.0
306045,238,200,123,1,5.0


In [3]:
# Limit data to random sample of 100 trips.

sample_size = 100

groupby_baskets = X_train.groupby(['user_id', 'session_id'])
baskets_idx = np.arange(groupby_baskets.ngroups)
np.random.shuffle(baskets_idx)

X_train_limited = (X_train.loc[groupby_baskets.ngroup()
                                              .isin(baskets_idx[:sample_size])]
                          .sort_values(['session_id', 'user_id'])
                          .reset_index(drop=True))
X_train_limited

Unnamed: 0,user_id,item_id,session_id,quantity,price
0,55,100,10,1,1.0
1,55,300,10,1,1.0
2,55,301,10,1,1.0
3,200,400,23,1,1.0
4,200,401,23,1,1.0
...,...,...,...,...,...
307,134,301,400,1,1.0
308,134,200,400,1,1.0
309,223,300,400,1,1.0
310,223,301,400,1,1.0


## 2. Instantiate and fit model

In [4]:
# Create Shopper instance

model = shopper.Shopper(X_train_limited)

INFO:root:Building the Shopper model...


TypeError: index must be integers

In [None]:
# # Fit model with MCMC sampling

# mcmc_res = model.fit(N=10000, method='MCMC')

In [None]:
# # Results summary:
# # Summary of common posterior statistics 
# # and sampling diagnostics

# mcmc_res.summary()

In [None]:
# Fit model with ADVI approximation

advi_res = model.fit(N=1000, method='ADVI')

In [None]:
# Results summary:
# Summary of common posterior statistics
# Note: must define number of draws from approximated posterior distribution

advi_res.summary(draws=100)

## 3. Diagnostics

In [None]:
# # Sampling trace plot

# mcmc_res.trace_plot()

In [None]:
# ELBO plot (ADVI)

fig = advi_res.elbo_plot()