In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from spotlight.interactions import Interactions
from spotlight.factorization.explicit import ExplicitFactorizationModel
from spotlight.cross_validation import random_train_test_split
from spotlight import evaluation

In [2]:
df = pd.read_csv('data/beer.csv')

In [3]:
user = 'name'
item = 'beer'
rating = 'rating'

user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

eu = user_encoder.fit_transform(df[user])
ei = item_encoder.fit_transform(df[item])

explicit_interactions = Interactions(eu, ei, np.array(df['rating']).astype('float32'))

In [4]:
train, test = random_train_test_split(explicit_interactions, random_state=np.random.RandomState(42))

In [24]:
explicit_model = ExplicitFactorizationModel(
    loss='regression',
    embedding_dim=32,
    n_iter=10,
    batch_size=250,
    learning_rate=0.01
)

explicit_model.fit(train)

In [25]:
pk, rk = evaluation.precision_recall_score(explicit_model, test, train=None, k=10)
np.mean(pk)

0.132967032967033

In [26]:
coo = pd.DataFrame(explicit_interactions.tocoo().todense())
coo.index = [user_encoder.inverse_transform([i])[0] for i in coo.index]
coo.columns = [item_encoder.inverse_transform([c])[0] for c in coo.columns]

In [28]:
coo.head()

Unnamed: 0,A l'abri de la Tempête: Corne de Brume,Ace Hill Brewing Company: Ace Hill Pilsner,Alexander Keiths: Red Amber Ale,Amsterdam Brewing Company: (416) Local Lager,Amsterdam Brewing Company: 3 Speed,Amsterdam Brewing Company: All Natural Blonde,Amsterdam Brewing Company: Autumn Hop,Amsterdam Brewing Company: Big Wheel Deluxe Amber,Amsterdam Brewing Company: Boneshaker,Amsterdam Brewing Company: Cruiser All Day Pale Ale,...,Unibroue: Éphémère Cherry,Unibroue: Éphémère Elderberry,Unibroue: Éphémère Pear,Wellington Brewery: Bewitched Belgian,Wellington Brewery: Helles Bock,Wellington Brewery: Iron Duke,Woodhouse Brewing Co.: Woodhouse Lager,À la Fût: Cuvée Houblonée I,À la Fût: Hopfenweisse,À la Fût: À La Belge Triple
Aaron Clark,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Adam Johnson,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Amber Chapman,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Amber Mullins,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Andrea Washington,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
person = 'Adam Johnson'
user_id = user_encoder.transform([person])[0]
explicit_model.predict(np.array([user_id]))
item_encoder.classes_[:10]

array(["A l'abri de la Tempête: Corne de Brume",
       'Ace Hill Brewing Company: Ace Hill Pilsner',
       'Alexander Keiths: Red Amber Ale',
       'Amsterdam Brewing Company: (416) Local Lager',
       'Amsterdam Brewing Company: 3 Speed',
       'Amsterdam Brewing Company: All Natural Blonde',
       'Amsterdam Brewing Company: Autumn Hop',
       'Amsterdam Brewing Company: Big Wheel Deluxe Amber',
       'Amsterdam Brewing Company: Boneshaker',
       'Amsterdam Brewing Company: Cruiser All Day Pale Ale'],
      dtype=object)

In [46]:
pred = pd.DataFrame({
    'p-rating': explicit_model.predict(np.array([user_id])),
    'beer': item_encoder.classes_
})

pred = pred.sort_values('p-rating', ascending=False)
ordered_predictions = pred['beer'].tolist()

In [55]:
tried_beers = df[df['name'] == person]['beer'].tolist()
tried_beers

['Heineken: Heineken Lager',
 'Guiness: Guinness',
 "Brasserie D'Achouffe: La Chouffe",
 'Brasserie Dieu De Ciel!: Peche Mortel',
 'Bellwoods Brewery: Jelly King',
 'Brouwerij Rodenbach: Rodenbach Grand Cru,',
 'Mill Street Brewery: 100th Meridian',
 'Creemore Spring Brewery: Creemore Springs Premium Lager',
 'Sawdust City Brewing Co.: Juicin',
 'Grupo Modelo: Corona',
 'Coors Brewing Company: Coors Light',
 'Blue Moon Brewing Company: Blue Moon',
 'Great Lakes Brewery: Great Lakes Octopus Wants To Fight IPA',
 "Beau's All Natural Brewing Company: Beau's Lugtread",
 'Side Launch Brewing Company: Side Launch Wheat',
 'Clifford Brewing Co.: Clifford Porter',
 'Sawdust City Brewing Co.: Little Norway',
 'Alexander Keiths: Red Amber Ale',
 'Sierra Nevada Brewing Co.: Pale Ale',
 'Redline Brewhouse: Clutch']

In [None]:
ordered_predictions

In [52]:
[i for i in ordered_predictions if i not in tried_beers][:10]

['High Road Brewing: Bronan',
 'High Road Brewing: Cloud Piercer',
 'Brewery Ommegang: Ommegang Pale Sour',
 'Sawdust City Brewing: Twin Pine',
 'Bellwoods Brewery: Jelly King - Pink Guava',
 'Bellwoods Brewery: Witchshark',
 'Nickel Brook Brewing: Über - Raspberry',
 'Bellwoods Brewery: Jutsu',
 'Les Trois Mousquetaires: Gose',
 'Bellwoods Brewery: Boogie Monster']

In [54]:
from spotlight.evaluation import rmse_score
rmse_score(explicit_model, test)

2.3869462

### WIP

In [68]:
class InteractionMachine:
    
    def __init__(self, df, ratings, users, items):
        self.user_encoder = LabelEncoder()
        self.item_encoder = LabelEncoder()
        user_ids = self.user_encoder.fit_transform(df[user])
        item_ids = self.item_encoder.fit_transform(df[item])
        ratings = np.array(df[ratings]).astype('float32')
        self.interactions = Interactions(user_ids, item_ids, ratings)
        
im = InteractionMachine(df, 'rating', 'user', 'item')
im.interactions

<Interactions dataset (98 users x 337 items x 2660 interactions)>

In [70]:
train, test = random_train_test_split(im.interactions)

In [71]:
class RecommendationMachine(ExplicitFactorizationModel):
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        
    def fit(self, interaction_machine):
        super().fit(interaction_machine.interactions)
        self.user_encoder = interaction_machine.user_encoder
        self.item_encoder = interaction_machine.item_encoder


model = RecommendationMachine(n_iter=100)
model.fit(im)

In [74]:
person = 'Adam Johnson'
user_id = user_encoder.transform([person])[0]

pred = pd.DataFrame({
    'p-rating': explicit_model.predict(np.array([user_id])),
    'beer': item_encoder.classes_
})
pred = pred.sort_values('p-rating', ascending=False)
ordered_predictions = pred['beer'].tolist()
tried_beers = df[df['name'] == person]['beer'].tolist()
[i for i in ordered_predictions if i not in tried_beers][:10]

['High Road Brewing: Bronan',
 'High Road Brewing: Cloud Piercer',
 'Brewery Ommegang: Ommegang Pale Sour',
 'Sawdust City Brewing: Twin Pine',
 'Bellwoods Brewery: Jelly King - Pink Guava',
 'Bellwoods Brewery: Witchshark',
 'Nickel Brook Brewing: Über - Raspberry',
 'Bellwoods Brewery: Jutsu',
 'Les Trois Mousquetaires: Gose',
 'Bellwoods Brewery: Boogie Monster']