In [1]:
import os
import implicit
import pandas as pd
import numpy as np
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt
from scipy.sparse import coo_matrix

In [2]:
class Config:
    #dataset params
    base_path = '../input/bookcrossing-dataset/Book reviews/Book reviews'
    users_count = None
    items_count = None
    val_data_size = 5
    typ = 'count'
    
    #model params.
    factors = 200
    iterations = 20
    regularization = 0.01
    show_progress = True
    
    N = 1 #number of items to be recommended.

In [3]:
actions_df = pd.read_csv('{}/BX-Book-Ratings.csv'.format(Config.base_path), sep=";", encoding='CP1252', escapechar='\\')
users_df = pd.read_csv('{}/BX-Users.csv'.format(Config.base_path), sep=";", encoding='CP1252', escapechar='\\')
items_df = pd.read_csv('{}/BX_Books.csv'.format(Config.base_path), sep=";", encoding='CP1252', escapechar='\\')

actions_df.shape, users_df.shape, items_df.shape

In [4]:
#get all the users and items.
Config.users_count = users_df['User-ID'].unique().tolist()
Config.items_count = items_df['ISBN'].unique().tolist()
#assign new ids for items by enumerating.
items_ids = dict(enumerate(Config.items_count))
items_ids = dict([(v, k) for k, v in items_ids.items()])
#map the new item ids to the existing ones.
items_df['Item-ID'] = items_df['ISBN'].map(items_ids)
actions_df['Item-ID'] = actions_df['ISBN'].map(items_ids)
#reduce the ids of users by 1.
users_df['User-ID'] = users_df['User-ID'] - 1
# users_df['User-ID'] = users_df['User-ID'].astype(str)
actions_df['User-ID'] = actions_df['User-ID'] - 1
# actions_df['User-ID'] = actions_df['User-ID'].astype(str)
actions_df = actions_df.dropna(axis=0).reset_index(drop=True)

In [5]:
def split_dataset(dataset, val_split, typ):
    ex_count = dataset.shape[0]
    val_data_size = None
    
    if typ == 'percent':
        val_data_size = int(ex_count*val_split)
        if val_data_size < 1:
            raise('Invalid validation split parameter value.')
    else:
        val_data_size = val_split
    
    data_val = dataset.iloc[ex_count-val_data_size:]
    data_val.reset_index(drop=True)
    data_train = dataset.iloc[:ex_count-val_data_size]
    
    return data_train, data_val

def df_to_coo(dataset):
    user_index = dataset['User-ID'].values
    item_index = dataset['Item-ID'].values
    values = dataset['Book-Rating'].values
    
    if Config.users_count == None or Config.items_count == None:
        raise('Not configured properly. Checkout config class.')
    
    return coo_matrix(
        (values, (item_index, user_index)), 
        shape=(len(Config.items_count), len(Config.users_count))
    )


def prepare_dataset(dataset, val_split=0.05, typ='percent'): #percent and count.
    data_train, data_val = split_dataset(dataset, val_split, typ)
    coo_train = df_to_coo(data_train)
    coo_val = df_to_coo(data_val)
    return {
        'train_coo' : coo_train, 
        'train_csr' : coo_train.T.tocsr(),
        'val_coo' : coo_val,
        'data_val' : data_val,
        'data_train' : data_train
    }

def train(matrices, factors=100, iterations=10, regularization=0.01, show_progress=True):
    coo_train = matrices['train_coo']
    
    model = implicit.als.AlternatingLeastSquares(factors=factors, 
                                                 iterations=iterations, 
                                                 regularization=regularization, 
                                                 random_state=42)
    model.fit(coo_train, show_progress=show_progress)
    
    return model

def predict(model, matrices):
    preds = {}
    csr_train = matrices['train_csr']
    items = model.recommend_all(csr_train, N=Config.N, filter_already_liked_items=True)
    for index, user_id in enumerate(users_df['User-ID'].values):
        preds[user_id] = items[index]
    return preds

In [6]:
#train
matrices = prepare_dataset(actions_df, val_split=Config.val_data_size, typ=Config.typ)

model_params = {
    'factors' : Config.factors,
    'iterations' : Config.iterations,
    'regularization' : Config.regularization,
    'show_progress' : Config.show_progress
    
}
model = train(matrices, **model_params)
preds = predict(model, matrices)

In [13]:
help(model)