In [204]:
import numpy as np
import pandas as pd
from IPython.display import display
import time
import os
from dotenv import load_dotenv
from evaluation_and_testing_helpers.testing import evaluate_train_test_split_explicit
from evaluation_and_testing_helpers.testing import evaluate_leave_one_out_explicit

load_dotenv()

DB_URL = os.getenv('DB_URL')

pd.set_option('display.max_columns', 50)

In [205]:
from pymongo import MongoClient

client = MongoClient(DB_URL)
db = client['alkoholove']

### Load alcohol items data

In [206]:
ALCOHOL_COLUMNS = ['_id', 'name', 'kind', 'type', 'alcohol_by_volume', 'color', 'manufacturer', 'country', 'region']

alcohols_collection = db['alcohols']
items_df = pd.DataFrame(
    list(alcohols_collection.find({}, {field_name: 1 for field_name in ALCOHOL_COLUMNS}))
).rename(columns={'_id': 'item_id'})

items_df['item_id'] = items_df['item_id'].apply(lambda x: str(x))
items_df.replace(r'^\s*$', np.nan, regex=True, inplace=True)

display(items_df.head(10))

Unnamed: 0,item_id,name,kind,type,alcohol_by_volume,color,manufacturer,country,region
0,62aa2b37e33ccae4961a4daa,Jägermeister,likier,Ziołowy,35.0,ciemny,Mast-Jägermeister SE,Niemcy,
1,62ab28e88a757f60cc3f31db,Biały Bocian Słony Karmel,likier,mleczny,16.0,karmelowy,Polmos,Polska,Bielsko-Biała
2,62ab2a32fd2e7fbd58da41d9,Biały Bocian Advocat,likier,jajeczny,16.0,żółty,Polmos,Polska,Bielsko-Biała
3,62ab611c4a2fcedd4ce86a79,James cook white oversea,rum,biały,37.5,biały,Eckerts Wacholder Brennerei GmbH,Niemcy,
4,62ab638288a7811f65839221,Krupnik Słony Karmel,likier,mleczny,16.0,karmelowy,Sobieski,Polska,
5,62ab661388a7811f65839222,Havana Club Anejo Especial,rum,złoty,37.5,złoty,Havana Club,Kuba,
6,62ab681f88a7811f65839223,Sheridans Coffee Layered Liqueur,likier,kawowy,15.5,czarny i biały,Thomas Sheridan & Sons,Irlandia,Dublin
7,62ab6afe4a2fcedd4ce86a7a,Captain Morgan Original Spiced Gold,rum,złoty,35.0,bursztynowy,Diageo PLC,Anglia,Londyn
8,62ab6cb588a7811f65839224,Captain Morgan White Rum,rum,biały,35.0,biały,Diageo PLC,Anglia,Londyn
9,62ab6eca88a7811f65839225,She Słony Karmel,likier,mleczny,17.0,karmelowy,BZK Alco,Polska,


### Load users data

In [207]:
reviews_collection = db['reviews']
favourites_collection = db['user_favourites']
wishlist_collection = db['user_wishlist']
user_search_collection = db['user_search_history']

reviews_df = pd.DataFrame(
    list(reviews_collection.find({}, {'_id': 0, 'user_id': 1, 'alcohol_id': 1, 'rating': 1}))
).rename(columns={'alcohol_id': 'item_id'})
display(reviews_df.head(10))

favourites_df = pd.DataFrame(
    list(favourites_collection.find({'alcohols': {'$ne': [] }}, {'_id': 0, 'user_id': 1, 'alcohols': 1}))
).explode('alcohols').rename(columns={'alcohols': 'item_id'})
favourites_df['favourite'] = 1
display(favourites_df.head(10))

wishlist_df = pd.DataFrame(
    list(wishlist_collection.find({'alcohols': {'$ne': [] }}, {'_id': 0, 'user_id': 1, 'alcohols': 1}))
).explode('alcohols').rename(columns={'alcohols': 'item_id'})
wishlist_df['wishlist'] = 1
display(wishlist_df.head(10))

user_search = list(user_search_collection.find({'alcohols': {'$ne': [] }}, {'_id': 0, 'user_id': 1, 'alcohols': 1}))
user_search = [
    {
        'user_id': user_search_entry['user_id'],
        'alcohols': set(
            [alcohol_search_entry['alcohol_id'] for alcohol_search_entry in user_search_entry['alcohols']]
        )
    } for user_search_entry in user_search
]
user_search_df = pd.DataFrame(
    user_search
).explode('alcohols').rename(columns={'alcohols': 'item_id'})
user_search_df['search'] = 1
display(user_search_df.head(10))

# free memory and close db connection
del reviews_collection, favourites_collection, wishlist_collection, user_search_collection, user_search
db.client.close()

Unnamed: 0,user_id,item_id,rating
0,6288e2fdd5ab6070dde8db8c,6288e32dd5ab6070dde8db8a,5
1,629f5bee10456c7cbc3af712,62b4c0265aff740017de687a,3
2,629f5bee10456c7cbc3af712,62b5871901bc976edbd13a94,3
3,62ab1fb3fd2e7fbd58da41d3,62ab7d6b88a7811f6583922b,5
4,62ab1fb3fd2e7fbd58da41d3,62b4391cd7995d801996d75f,4
5,62acd4d9ba0c8633231112d9,62fa9edb2eb13eb1b8afd64c,4
6,62aed4f0d20cdefa840bac0b,62b4a1805aff740017de6866,5
7,62acd4d9ba0c8633231112d9,62d1bb5e0f95370144ad09e2,4
8,629f5bee10456c7cbc3af712,62b4407ddd971448a4beeaeb,3
9,62ab1b5cfd2e7fbd58da41cf,62d5b5fd20512ec9fa9a0dd6,4


Unnamed: 0,user_id,item_id,favourite
0,629f5bee10456c7cbc3af712,6288e32dd5ab6070dde8db8a,1
0,629f5bee10456c7cbc3af712,62ab638288a7811f65839221,1
0,629f5bee10456c7cbc3af712,62b4391cd7995d801996d75f,1
0,629f5bee10456c7cbc3af712,62b4407ddd971448a4beeaeb,1
0,629f5bee10456c7cbc3af712,6322409d86d1ba39137c5ccc,1
1,6288e2fdd5ab6070dde8db8b,6288e32dd5ab6070dde8db8a,1
2,62ab1b5cfd2e7fbd58da41cf,62aa2b37e33ccae4961a4daa,1
2,62ab1b5cfd2e7fbd58da41cf,62d5b5fd20512ec9fa9a0dd6,1
3,62ab1fb3fd2e7fbd58da41d3,62ab6afe4a2fcedd4ce86a7a,1
3,62ab1fb3fd2e7fbd58da41d3,62ab7d6b88a7811f6583922b,1


Unnamed: 0,user_id,item_id,wishlist
0,6288e2fdd5ab6070dde8db8b,6288e32dd5ab6070dde8db8a,1
1,629f5bee10456c7cbc3af712,6288e32dd5ab6070dde8db8a,1
1,629f5bee10456c7cbc3af712,62ab28e88a757f60cc3f31db,1
1,629f5bee10456c7cbc3af712,62dd9a8a5ef6265fb6823107,1
1,629f5bee10456c7cbc3af712,62b4391cd7995d801996d75f,1
2,6288e2fdd5ab6070dde8db8c,6288e32dd5ab6070dde8db8a,1
3,62ab1b5cfd2e7fbd58da41cf,62aa2b37e33ccae4961a4daa,1
4,62ab1fb3fd2e7fbd58da41d3,62ab6afe4a2fcedd4ce86a7a,1
5,62acd4d9ba0c8633231112d9,6345b22444e3d00b138cdae2,1
5,62acd4d9ba0c8633231112d9,62e668de6559e7a36a34fd59,1


Unnamed: 0,user_id,item_id,search
0,6288e2fdd5ab6070dde8db8b,6288e32dd5ab6070dde8db8a,1
1,629f5bee10456c7cbc3af712,62dd3383d1f50b95cb0c8f33,1
1,629f5bee10456c7cbc3af712,62d8515b7e5961377764ed3a,1
1,629f5bee10456c7cbc3af712,62f7f53dc48b41f538750d14,1
1,629f5bee10456c7cbc3af712,62ab638288a7811f65839221,1
1,629f5bee10456c7cbc3af712,62b4407ddd971448a4beeaeb,1
1,629f5bee10456c7cbc3af712,62b495c27c93cafdd7d0944f,1
1,629f5bee10456c7cbc3af712,62b5871901bc976edbd13a94,1
1,629f5bee10456c7cbc3af712,62ab28e88a757f60cc3f31db,1
1,629f5bee10456c7cbc3af712,6322409d86d1ba39137c5ccc,1


### Merge dataframes

In [208]:
interactions_df = pd.merge(
    reviews_df,
    pd.merge(
        user_search_df,
        pd.merge(favourites_df, wishlist_df, on=['user_id', 'item_id'], how='outer'),
        on=['user_id', 'item_id'],
        how='outer'
    ),
    on=['user_id', 'item_id'],
    how='outer'
).fillna(0)

# free memory
del reviews_df, user_search_df, favourites_df, wishlist_df

interactions_df['user_id'] = interactions_df['user_id'].apply(lambda x: str(x))
interactions_df['item_id'] = interactions_df['item_id'].apply(lambda x: str(x))

display(interactions_df.head(10))

Unnamed: 0,user_id,item_id,rating,search,favourite,wishlist
0,6288e2fdd5ab6070dde8db8c,6288e32dd5ab6070dde8db8a,5.0,0.0,0.0,1.0
1,629f5bee10456c7cbc3af712,62b4c0265aff740017de687a,3.0,0.0,0.0,0.0
2,629f5bee10456c7cbc3af712,62b5871901bc976edbd13a94,3.0,1.0,0.0,0.0
3,62ab1fb3fd2e7fbd58da41d3,62ab7d6b88a7811f6583922b,5.0,1.0,1.0,0.0
4,62ab1fb3fd2e7fbd58da41d3,62b4391cd7995d801996d75f,4.0,1.0,0.0,0.0
5,62acd4d9ba0c8633231112d9,62fa9edb2eb13eb1b8afd64c,4.0,1.0,1.0,0.0
6,62aed4f0d20cdefa840bac0b,62b4a1805aff740017de6866,5.0,0.0,1.0,0.0
7,62acd4d9ba0c8633231112d9,62d1bb5e0f95370144ad09e2,4.0,0.0,0.0,0.0
8,629f5bee10456c7cbc3af712,62b4407ddd971448a4beeaeb,3.0,1.0,1.0,0.0
9,62ab1b5cfd2e7fbd58da41cf,62d5b5fd20512ec9fa9a0dd6,4.0,1.0,1.0,0.0


### Merge interactions with items

In [209]:
ml_df = pd.merge(interactions_df, items_df, on='item_id')
display(ml_df)

Unnamed: 0,user_id,item_id,rating,search,favourite,wishlist,name,kind,type,alcohol_by_volume,color,manufacturer,country,region
0,629f5bee10456c7cbc3af712,62b4c0265aff740017de687a,3.0,0.0,0.0,0.0,SIERRA Silver Tequila,tequila,czysta,38.0,przezroczysty,Destileria Sierra S.A. de C.V.,Meksyk,
1,629f5bee10456c7cbc3af712,62b5871901bc976edbd13a94,3.0,1.0,0.0,0.0,Cydr lubelski klasyczny,cydr,musujący,4.5,przezroczysty,Ambra S.A.,Polska,Lubelszczyzna
2,62ab1fb3fd2e7fbd58da41d3,62b5871901bc976edbd13a94,3.0,1.0,0.0,0.0,Cydr lubelski klasyczny,cydr,musujący,4.5,przezroczysty,Ambra S.A.,Polska,Lubelszczyzna
3,62ab1fb3fd2e7fbd58da41d3,62ab7d6b88a7811f6583922b,5.0,1.0,1.0,0.0,Bacardi Spiced,rum,złoty,35.0,miedziany,Santiago de Cuba,Puerto Rico,
4,62acd4d9ba0c8633231112d9,62ab7d6b88a7811f6583922b,0.0,1.0,0.0,0.0,Bacardi Spiced,rum,złoty,35.0,miedziany,Santiago de Cuba,Puerto Rico,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,62aed4f0d20cdefa840bac0b,62f4065cd079a6eda7ddf043,0.0,1.0,0.0,0.0,Zlatý Bažant,piwo,pilsner,4.6,złoty,Martin Sp z o.o.,Slowacja,
75,62aed4f0d20cdefa840bac0b,6341ec675594f5b0694aaf04,0.0,1.0,0.0,0.0,AleBrowar Black Hope,piwo,Black IPA,6.2,czarny,AleBrowar,Polska,Wrocław
76,62aed4f0d20cdefa840bac0b,62b4ec475aff740017de688c,0.0,1.0,0.0,0.0,Absolwent Grejpfrutowy,likier,owocowy,32.0,różowy,Polmos Białystok,Polska,
77,62aed4f0d20cdefa840bac0b,635305991e38252337505fe2,0.0,1.0,0.0,0.0,AleBrowar & Nocny Kochanek Amarenowe,piwo,Fruit gose,4.6,ciemne,AleBrowar,Polska,Wrocław


### Linear Regression Recommender

### Quick test

In [327]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MultiLabelBinarizer


class LinearRegressionRecommender:
    def __init__(self):
        self.model = None
        self.mlb = None
        self.users_dict = None
        self.user_features = None

    def fit(self, interactions_df, items_df):
        # Prepare user features
        _items_df = items_df.copy()
        _items_df['alcohol_type'] = _items_df['kind'] + '_' + _items_df['type']
        _items_df['alcohol_type'] = _items_df['alcohol_type'].apply(lambda x: [x.lower()])

        _interactions_df = interactions_df.copy()
        users_df = interactions_df[['user_id', 'kind', 'type', 'rating', 'favourite', 'search', 'wishlist']].copy()
        users_df['rating'] = users_df['rating'].apply(lambda x: 1 if x > 0 else x)
        users_df['num_of_interactions'] = users_df['rating'] + \
                                                  users_df['favourite'] + \
                                                  users_df['search'] + \
                                                  users_df['wishlist']
        users_df['alcohol_type'] = users_df['kind'].str.lower() + '_' + users_df['type'].str.lower()
        users_df.drop(columns=['kind', 'type', 'rating', 'favourite', 'search', 'wishlist'], inplace=True)
        users_df = users_df.pivot_table(index='user_id', columns='alcohol_type', values='num_of_interactions', aggfunc='sum')
        users_df = users_df / users_df.sum(axis=1).values.reshape(-1, 1)
        users_df = users_df.rename_axis(None, axis=1).fillna(0)

        self.users_dict = users_df.to_dict('index')

        self.user_features = users_df.columns.tolist()

        _interactions_df['alcohol_type'] = _interactions_df['kind'] + '_' + _interactions_df['type']
        _interactions_df['alcohol_type'] = _interactions_df['alcohol_type'].apply(lambda x: [x.lower()])
        _interactions_df['num_of_interactions'] = _interactions_df['rating'] + \
                                                  _interactions_df['favourite'] + \
                                                  _interactions_df['search'] + \
                                                  _interactions_df['wishlist']
        # Prepare item features

        # Transform alcohol types into binary values

        self.mlb = MultiLabelBinarizer()
        _interactions_df = _interactions_df.join(
            pd.DataFrame(
                self.mlb.fit_transform(_interactions_df.pop('alcohol_type')),
                columns=self.mlb.classes_,
                index=_interactions_df.index
            )
        )
        users_df = users_df.add_prefix('user_')
        _interactions_df = _interactions_df.merge(users_df, on='user_id')
        # Prepare input data and fit the model
        _interactions_df[self.mlb.classes_] = _interactions_df[self.mlb.classes_] \
            / _interactions_df[self.mlb.classes_].sum(axis=1).values.reshape(-1, 1)

        _interactions_df[self.mlb.classes_] = _interactions_df[self.mlb.classes_] \
            * _interactions_df[self.user_features].values

        x = _interactions_df.loc[:, list(self.mlb.classes_)].values
        y = _interactions_df['num_of_interactions'].values
        display(x)
        display(_interactions_df.loc[:, list(self.mlb.classes_)])
        display(_interactions_df['num_of_interactions'])
        display(_interactions_df[_interactions_df['whisky_blended'] == 1])
        display(_interactions_df[_interactions_df['piwo_specialty beer'] == 1])
        display(y)

        self.model = LinearRegression().fit(x, y)

    def recommend(self, users_df, items_df, n_recommendations=1):
        """
        Serving of recommendations. Scores items in items_df for each user in users_df and returns
        top n_recommendations for each user.

        :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.
        :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.
        :param int n_recommendations: Number of recommendations to be returned for each user.
        :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations
            for each user.
        :rtype: pd.DataFrame
        """

        # Transform the item to be scored into proper features
        _items_df = items_df.copy()
        _items_df['alcohol_type'] = _items_df['kind'] + '_' + _items_df['type']
        _items_df['alcohol_type'] = _items_df['alcohol_type'].apply(lambda x: [x.lower()])

        _items_df = _items_df.join(
            pd.DataFrame(
                self.mlb.transform(_items_df.pop('alcohol_type')),
                columns=self.mlb.classes_,
                index=items_df.index
            )
        )

        # Score the item
        recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])

        for ix, user in users_df.iterrows():
            if user['user_id'] in self.users_dict:
                user_df = pd.DataFrame.from_dict({user['user_id']: self.users_dict[user['user_id']]}, orient='index')
            else:
                user_df = pd.DataFrame.from_dict(
                    {user['user_id']: [1 / len(self.user_features)]*len(self.user_features)},
                    orient='index'
                )
            input_df = _items_df.copy()
            input_df[self.mlb.classes_] = _items_df[self.mlb.classes_] * user_df.values
            scores = self.model.predict(input_df.loc[:, self.mlb.classes_].values)
            display(scores[499])
            display(scores[42])
            chosen_pos = np.argsort(-scores)[:n_recommendations]
            display(chosen_pos)
            user_recommendations = []
            for item_pos in chosen_pos:
                user_recommendations.append(
                    {
                        'user_id': user['user_id'],
                        'item_id': input_df.iloc[item_pos]['item_id'],
                        'score': scores[item_pos]
                    }
                )

            user_recommendations = pd.DataFrame(user_recommendations)

            recommendations = pd.concat([recommendations, user_recommendations])

        return recommendations

In [328]:
# Quick test of the recommender

lr_recommender = LinearRegressionRecommender()
lr_recommender.fit(ml_df, items_df)
recommendations = lr_recommender.recommend(pd.DataFrame(['629f5bee10456c7cbc3af712'], columns=['user_id']), items_df, 40)

recommendations = pd.merge(recommendations, items_df, on='item_id', how='left')
display(recommendations)

array([[0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

Unnamed: 0,cydr_musujący,likier_mleczny,likier_owocowy,likier_ziołowy,piwo_aipa,piwo_american ipa,piwo_american pale ale,piwo_apa,piwo_baltic porter,piwo_black ipa,piwo_bohemian pilsener,piwo_ciemny lager,piwo_english pale ale,piwo_fruit & honey ale,piwo_fruit gose,piwo_india pale ale,piwo_lager,piwo_milk stout,piwo_non-alcoholic witbier,piwo_pale lager,piwo_pils,piwo_pilsner,piwo_porter,piwo_smoked imperial stout,piwo_smoked white ipa,piwo_sour ale,piwo_specialty beer,piwo_stout,piwo_strong lager,piwo_světlý ležák,piwo_weissbier,piwo_witbier,rum_biały,rum_czarny,rum_złoty,tequila_czysta,whisky_blended,whisky_single malt,wino_deserowe,wino_słodkie,wino_wytrawne,wódka_czysta
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
77,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


0     3.0
1     4.0
2     3.0
3     5.0
4     2.0
     ... 
74    1.0
75    1.0
76    1.0
77    1.0
78    1.0
Name: num_of_interactions, Length: 79, dtype: float64

Unnamed: 0,user_id,item_id,rating,search,favourite,wishlist,name,kind,type,alcohol_by_volume,color,manufacturer,country,region,num_of_interactions,cydr_musujący,likier_mleczny,likier_owocowy,likier_ziołowy,piwo_aipa,piwo_american ipa,piwo_american pale ale,piwo_apa,piwo_baltic porter,piwo_black ipa,...,user_piwo_milk stout,user_piwo_non-alcoholic witbier,user_piwo_pale lager,user_piwo_pils,user_piwo_pilsner,user_piwo_porter,user_piwo_smoked imperial stout,user_piwo_smoked white ipa,user_piwo_sour ale,user_piwo_specialty beer,user_piwo_stout,user_piwo_strong lager,user_piwo_světlý ležák,user_piwo_weissbier,user_piwo_witbier,user_rum_biały,user_rum_czarny,user_rum_złoty,user_tequila_czysta,user_whisky_blended,user_whisky_single malt,user_wino_deserowe,user_wino_słodkie,user_wino_wytrawne,user_wódka_czysta
2,629f5bee10456c7cbc3af712,62b4391cd7995d801996d75f,0.0,1.0,1.0,1.0,Jameson,whisky,blended,40.0,bursztyn,Irish Distillers,Irlandia,Cork,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.285714,0.047619,0.0,0.0,0.0,0.0
3,629f5bee10456c7cbc3af712,62b4407ddd971448a4beeaeb,3.0,1.0,1.0,0.0,Jameson Caskmates Stout Edition,whisky,blended,40.0,karmel,Irish Distillers,Irlandia,Cork,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.285714,0.047619,0.0,0.0,0.0,0.0
14,62ab1fb3fd2e7fbd58da41d3,62b4391cd7995d801996d75f,4.0,1.0,0.0,0.0,Jameson,whisky,blended,40.0,bursztyn,Irish Distillers,Irlandia,Cork,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068182,0.045455,0.022727,0.0,0.045455,0.068182,0.113636,0.0,0.045455,0.022727,0.0,0.0,0.0,0.068182
50,62ab1b5cfd2e7fbd58da41cf,62b4407ddd971448a4beeaeb,0.0,1.0,0.0,0.0,Jameson Caskmates Stout Edition,whisky,blended,40.0,karmel,Irish Distillers,Irlandia,Cork,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.066667,0.0,0.2,0.0,0.133333,0.0


Unnamed: 0,user_id,item_id,rating,search,favourite,wishlist,name,kind,type,alcohol_by_volume,color,manufacturer,country,region,num_of_interactions,cydr_musujący,likier_mleczny,likier_owocowy,likier_ziołowy,piwo_aipa,piwo_american ipa,piwo_american pale ale,piwo_apa,piwo_baltic porter,piwo_black ipa,...,user_piwo_milk stout,user_piwo_non-alcoholic witbier,user_piwo_pale lager,user_piwo_pils,user_piwo_pilsner,user_piwo_porter,user_piwo_smoked imperial stout,user_piwo_smoked white ipa,user_piwo_sour ale,user_piwo_specialty beer,user_piwo_stout,user_piwo_strong lager,user_piwo_světlý ležák,user_piwo_weissbier,user_piwo_witbier,user_rum_biały,user_rum_czarny,user_rum_złoty,user_tequila_czysta,user_whisky_blended,user_whisky_single malt,user_wino_deserowe,user_wino_słodkie,user_wino_wytrawne,user_wódka_czysta
5,629f5bee10456c7cbc3af712,6322409d86d1ba39137c5ccc,3.0,1.0,1.0,0.0,Fortuna Miodowe Ciemne,piwo,specialty beer,5.6,karmelowy,Browar FORTUNA Sp. z o.o.,Polska,,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.285714,0.047619,0.0,0.0,0.0,0.0


array([3., 4., 3., 5., 2., 5., 1., 1., 1., 1., 2., 1., 4., 7., 5., 1., 6.,
       7., 7., 4., 4., 6., 5., 7., 2., 5., 6., 7., 1., 1., 1., 2., 1., 6.,
       4., 5., 5., 7., 7., 2., 2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1.,
       6., 8., 5., 6., 1., 1., 6., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])



3.193051506316812

3.0656381600259155

array([499, 518, 485, 473, 558, 516, 455, 584, 440, 484, 442, 445, 560,
       561, 441, 444, 443, 567, 123, 111,  42, 106,  43,  54,  55,  57,
        58, 137,  65,  66,  79,  85,  56,  92,  64, 130, 458, 417, 509,
       459], dtype=int64)

Unnamed: 0,user_id,item_id,score,name,kind,type,alcohol_by_volume,color,manufacturer,country,region
0,629f5bee10456c7cbc3af712,6322409d86d1ba39137c5ccc,3.193052,Fortuna Miodowe Ciemne,piwo,specialty beer,5.6,karmelowy,Browar FORTUNA Sp. z o.o.,Polska,
1,629f5bee10456c7cbc3af712,6328e13cf840cb9bd8185149,3.193052,Trybunalskie Miodowe,piwo,specialty beer,6.0,złoty,Perła - Browary Lubelskie S.A.,Polska,
2,629f5bee10456c7cbc3af712,631bb23c000c24db49dd5b55,3.193052,Cortes Rosado,piwo,specialty beer,4.5,różowy,Van Pur SA.,Polska,
3,629f5bee10456c7cbc3af712,6318cce3c702798f8682375d,3.193052,Desperados Lime,piwo,specialty beer,3.0,jasne,Grupa Żywiec S.A.,Polska,
4,629f5bee10456c7cbc3af712,634be860d64e597caf8e395d,3.193052,Lubuskie i Tequila,piwo,specialty beer,4.9,jasne,Browar Witnica,Polska,Lubuskie
5,629f5bee10456c7cbc3af712,6328df56f840cb9bd8185147,3.193052,Perła Miodowa,piwo,specialty beer,5.4,bursztynowy,Perła - Browary Lubelskie S.A.,Polska,
6,629f5bee10456c7cbc3af712,630d14899b0fa74888cb9eb8,3.193052,Cortes Tequila,piwo,specialty beer,6.0,słomkowy,browar Van Pur,Polska,
7,629f5bee10456c7cbc3af712,634c092fd53de905ed3963ae,3.193052,Lubuskie Zielone,piwo,specialty beer,4.2,zielony,Browar Witnica,Polska,Lubuskie
8,629f5bee10456c7cbc3af712,630286922c8204566bc5bfda,3.193052,Desperados,piwo,specialty beer,6.0,jasne,Grupa Żywiec S.A.,Polska,
9,629f5bee10456c7cbc3af712,631bb13c000c24db49dd5b54,3.193052,Cortes Muscat,piwo,specialty beer,4.5,jasny,Van Pur S.A.,Polska,


### Train-test split test

In [213]:
lr_recommender = LinearRegressionRecommender()

t0 = time.time()
seed = 6789

results = [
    ['LinearRegressionRecommender'] + list(
        evaluate_train_test_split_explicit(lr_recommender, ml_df, items_df, seed=seed))
]

results = pd.DataFrame(
    results,
    columns=['Recommender', 'RMSE', 'MRE', 'TRE']
)

display(results)

print('Total evaluation time: {}'.format(time.time() - t0))

                    user_id                   item_id     score
0  62aed4f0d20cdefa840bac0b  62b3564b568ac44060a46d68  2.443922
                    user_id                   item_id     score
0  629f5bee10456c7cbc3af712  62b4407ddd971448a4beeaeb  0.472055
                    user_id                   item_id     score
0  629f5bee10456c7cbc3af712  62b495c27c93cafdd7d0944f  1.875034
                    user_id                   item_id         score
0  62acd4d9ba0c8633231112d9  62e66c516ec5f8379ef62223  4.440892e-16
                    user_id                   item_id         score
0  62acd4d9ba0c8633231112d9  62e6896c4b51ceb70532f8b0  4.440892e-16
                    user_id                   item_id     score
0  62ab1fb3fd2e7fbd58da41d3  62f01bab19bf63dfdd05b3a9  1.875034
                    user_id                   item_id     score
0  62aed4f0d20cdefa840bac0b  6341e86f5594f5b0694aaf03  1.875034
                    user_id                   item_id     score
0  62acd4d9ba0c863323111



                    user_id                   item_id     score
0  62aed4f0d20cdefa840bac0b  63530c56dbdcc7392d6f1d3f  1.875034
                    user_id                   item_id     score
0  62ab1fb3fd2e7fbd58da41d3  62d1bf360f95370144ad09e3  1.875034
                    user_id                   item_id     score
0  62acd4d9ba0c8633231112d9  62b494eb5aff740017de6858  1.875034
                    user_id                   item_id     score
0  62aed4f0d20cdefa840bac0b  6341ec675594f5b0694aaf04  1.875034
                    user_id                   item_id     score
0  629f5bee10456c7cbc3af712  62dd9a8a5ef6265fb6823107  1.875034
                    user_id                   item_id     score
0  629f5bee10456c7cbc3af712  62b4c0265aff740017de687a  1.875034
                    user_id                   item_id     score
0  62aed4f0d20cdefa840bac0b  62dd2e7ef12326d8b3e521de  1.875034
                    user_id                   item_id     score
0  629f5bee10456c7cbc3af712  6322409d86d

  return 1 / len(r_pred) * np.sum(np.abs(r_pred - r_real) / np.abs(r_real))


Unnamed: 0,Recommender,RMSE,MRE,TRE
0,LinearRegressionRecommender,1.820904,inf,1.272473


Total evaluation time: 0.5731775760650635


In [240]:
display(ml_df[ml_df['user_id'] == '629f5bee10456c7cbc3af712'])

Unnamed: 0,user_id,item_id,rating,search,favourite,wishlist,name,kind,type,alcohol_by_volume,color,manufacturer,country,region
0,629f5bee10456c7cbc3af712,62b4c0265aff740017de687a,3.0,0.0,0.0,0.0,SIERRA Silver Tequila,tequila,czysta,38.0,przezroczysty,Destileria Sierra S.A. de C.V.,Meksyk,
1,629f5bee10456c7cbc3af712,62b5871901bc976edbd13a94,3.0,1.0,0.0,0.0,Cydr lubelski klasyczny,cydr,musujący,4.5,przezroczysty,Ambra S.A.,Polska,Lubelszczyzna
6,629f5bee10456c7cbc3af712,62b4391cd7995d801996d75f,0.0,1.0,1.0,1.0,Jameson,whisky,blended,40.0,bursztyn,Irish Distillers,Irlandia,Cork
13,629f5bee10456c7cbc3af712,62b4407ddd971448a4beeaeb,3.0,1.0,1.0,0.0,Jameson Caskmates Stout Edition,whisky,blended,40.0,karmel,Irish Distillers,Irlandia,Cork
26,629f5bee10456c7cbc3af712,62ab638288a7811f65839221,0.0,1.0,1.0,0.0,Krupnik Słony Karmel,likier,mleczny,16.0,karmelowy,Sobieski,Polska,
31,629f5bee10456c7cbc3af712,6322409d86d1ba39137c5ccc,3.0,1.0,1.0,0.0,Fortuna Miodowe Ciemne,piwo,specialty beer,5.6,karmelowy,Browar FORTUNA Sp. z o.o.,Polska,
39,629f5bee10456c7cbc3af712,62dd3383d1f50b95cb0c8f33,0.0,1.0,0.0,0.0,Irish Beer,piwo,lager,6.5,ciemne,Browar Kormoran,Polska,Warmińsko-mazurskie
42,629f5bee10456c7cbc3af712,62d8515b7e5961377764ed3a,0.0,1.0,0.0,0.0,Żubr,piwo,Pale Lager,6.0,jasne,Kampania Piwowarska,Polska,
43,629f5bee10456c7cbc3af712,62f7f53dc48b41f538750d14,0.0,1.0,0.0,0.0,Sadowski Cydr,cydr,musujący,4.5,,Bartex Bartol,Polska,
44,629f5bee10456c7cbc3af712,62b495c27c93cafdd7d0944f,0.0,1.0,0.0,0.0,Glenfiddich 12yo,whisky,single malt,40.0,bursztyn,William Grant and Sons,Szkocja,Moray
