# Scikit-surprise familiarization

### Import libraries

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import difflib
import random
%matplotlib inline

### Import relevant datasets

In [36]:
ratings_data = pd.read_csv('datasets/ratings.csv')
books_metadata = pd.read_csv('datasets/books.csv')
ratings_data.head()

Unnamed: 0,book_id,user_id,rating
0,1,314,5
1,1,439,3
2,1,588,5
3,1,1169,4
4,1,1185,4


### Create a surprise dataset  
Surprise dataset includes the following  
1. User IDs  
2. Item IDs  
3. Rating (Usually scale from 1-5)

In [3]:
from surprise import Dataset
from surprise import Reader

reader = Reader(rating_scale=(1,5)) # specify rating range
data = Dataset.load_from_df(ratings_data[['user_id', 'book_id', 'rating']], reader) # create data as surprise dataset


### Training and cross-validating a simple SVD Model

In [4]:
from surprise import SVD
from surprise.model_selection import cross_validate

svd = SVD(verbose=True, n_epochs=10) #declare svd model with number of epochs to be trained
cross_validate(svd, data, measures=['RMSE','MAE'], cv=3, verbose=True) #cross validate svd model using 3 fold cross validationn, with RMSE and MAE

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8574  0.8548  0.8566  0.8563  0.0011  
MAE (testset)     0.6760  0.6740  0.6760  0.6753  0.0009  
Fit time          21.10   21.40   20.47   20.99   0.39    
Test time         3.79    3.59    3.82    3.73    0.10    


{'test_rmse': array([0.85739693, 0.85479502, 0.85663335]),
 'test_mae': array([0.67598865, 0.67402016, 0.67599815]),
 'fit_time': (21.104050159454346, 21.402579307556152, 20.470815658569336),
 'test_time': (3.794731378555298, 3.590160369873047, 3.816460609436035)}

### Train model on the entire dataset

In [5]:
trainset = data.build_full_trainset() #convert dataset into a surprise trainset object
svd.fit(trainset) #training

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1dfc9baa5b0>

### Generating Rating Predictions

In [6]:
user = 20
item = 100

svd.predict(uid=user,iid=item)

Prediction(uid=20, iid=100, r_ui=None, est=4.182675296656429, details={'was_impossible': False})

### Utility functions for current use case

In [15]:
def get_book_id(title, data):
    existing_titles = list(data['title'].values)
    closest_titles = difflib.get_close_matches(title, existing_titles)
    book_id = data[data['title'] == closest_titles[0]]['id'].values[0]
    return book_id

def get_book_info(id, data):
    book_info = data[data['id'] == id][['id','isbn','authors','title','original_title']]
    return book_info.to_dict(orient='records')

def predict_review(user_id, title, model, data):
    book_id = get_book_id(title,data)
    review_prediction = model.predict(uid = user_id, iid = book_id)
    return review_prediction.est

def generate_recommendation(user_id, model, data, thresh=4):
    book_titles = list(data['title'].values)
    random.shuffle(book_titles)

    for title in book_titles:
        rating = predict_review(user_id, title, model, data)
        if rating >= thresh:
            book_id = get_book_id(title,data)
            return get_book_info(book_id, data)





In [20]:
rec_dict = generate_recommendation(109, svd, books_metadata)

In [34]:
print("Book title recommended:", rec_dict[0]['title'])
print("Written by:", rec_dict[0]['authors'])

Book title recommended: Hammered (The Iron Druid Chronicles, #3)
Written by: Kevin Hearne
