In [1]:
import pandas as pd
import numpy as np
from surprise import Reader, Dataset
from surprise import SVD, model_selection, accuracy
import pickle
import copy

In [2]:
with open("ratings_not_zero",'rb') as f:
    cleaned_dataset = pickle.load(f)

In [3]:
with open("books_details",'rb') as fp:
    books = pickle.load(fp)

In [4]:
cleaned_dataset

Unnamed: 0,user_id,isbn,book_rating
1,276726,0155061224,2
3,276729,052165615X,1
4,276729,0521795028,3
8,276744,038550120X,3
16,276747,0060517794,4
...,...,...,...
1149771,276704,0743211383,3
1149773,276704,0806917695,2
1149775,276704,1563526298,4
1149777,276709,0515107662,5


In [5]:
books

Unnamed: 0,isbn,book_title,book_author,year_of_publication,publisher,Publisher
0,0195153448,Classical Mythology,Mark P. O. Morford,2002.0,Oxford University Press,
1,0002005018,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,
2,0060973129,Decision in Normandy,Carlo D'Este,1991.0,HarperPerennial,
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999.0,Farrar Straus Giroux,
4,0399135782,The Kitchen God's Wife,Amy Tan,1991.0,Putnam Pub Group,
...,...,...,...,...,...,...
250007,0440400988,There's a Bat in Bunk Five,Paula Danziger,1988.0,Random House Childrens Pub (Mm),
250008,0525447644,From One to One Hundred,Teri Sloat,1991.0,Dutton Books,
250009,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004.0,HarperSanFrancisco,
250010,0192126040,Republic (World's Classics),Plato,1996.0,Oxford University Press,


In [6]:
# userid, bookid, rating
new_data = [
    (1001,'0425176428', 3),
    (1001,'1552041778', 4),
    (1001,'0771074670', 2),
    (1001,'0971880107', 1),
    (1001,'0375406328', 5),
    (1001,'3442410665', 3),
    (1001,'3404921038', 2),
    (1001,'038078243X', 4),
    (1001,'0375410538', 3),
    (1001,'0966986105', 4)
]

In [7]:
#changed above info to dataframe
new_df = pd.DataFrame(new_data,columns=["user_id","isbn","book_rating"])
new_df

Unnamed: 0,user_id,isbn,book_rating
0,1001,0425176428,3
1,1001,1552041778,4
2,1001,0771074670,2
3,1001,0971880107,1
4,1001,0375406328,5
5,1001,3442410665,3
6,1001,3404921038,2
7,1001,038078243X,4
8,1001,0375410538,3
9,1001,0966986105,4


In [8]:
#just to get book name
def res_to_book(res,books):
    res_book = list(res['isbn'].values)
    books_names = []
    for i in res_book:
        books_names.append(books[books.isbn == i].book_title.values[0])
    return books_names

In [9]:
#sending above info to res_to_book function
res_to_book(new_df,books)

["What If?: The World's Foremost Military Historians Imagine What Might Have Been",
 'Jane Doe',
 'Nights Below Station Street',
 'Wild Animus',
 'Lying Awake',
 'Sturmzeit. Roman.',
 'Wie Barney es sieht.',
 "Miss Zukas and the Raven's Dance",
 "Anil's Ghost",
 'Prescription for Terror']

In [10]:
#merged user '1001' ko data with existing rating df for predicting
merged_df = pd.concat([cleaned_dataset,new_df], axis=0)
merged_df

Unnamed: 0,user_id,isbn,book_rating
1,276726,0155061224,2
3,276729,052165615X,1
4,276729,0521795028,3
8,276744,038550120X,3
16,276747,0060517794,4
...,...,...,...
5,1001,3442410665,3
6,1001,3404921038,2
7,1001,038078243X,4
8,1001,0375410538,3


In [11]:
# training the model with 
def trainData(df, model):
    '''
        df should contain user id, book id and rating column
    '''
    model2 = copy.deepcopy(model)
    reader = Reader(rating_scale=(1, 5))
    dataset = Dataset.load_from_df(df, reader)
    data = dataset.build_full_trainset()
    model = model2.fit(data)
    return model

In [12]:
def recommend(user,df,model,output_limit=10):
    user_rated_books = df.loc[df.user_id==1001, 'isbn']
    unique_ids = df.isbn.unique()

    # remove the rated movies for the recommendations
    book_ids_topredict = np.setdiff1d(unique_ids,user_rated_books)
    
    pred = []
    for iid in book_ids_topredict:
        pred.append((iid, model.predict(uid=user,iid=iid).est))
        
    pred_df = pd.DataFrame(pred,columns=['isbn','pred_rating'])
    pred_df.sort_values('pred_rating',ascending=False, inplace=True)
    
    return pred_df.head(output_limit)

In [21]:
#Model initialization
model = SVD()

#Calling trainData function
model = trainData(cleaned_dataset,model)

In [22]:
#Calling predict function
result = recommend(1001,cleaned_dataset,model)

In [23]:
#Predicted output
res_to_book(result,books)

['Harry Potter and the Chamber of Secrets Postcard Book',
 'The Two Towers (The Lord of the Rings, Part 2)',
 'Dilbert: A Book of Postcards',
 '52 Deck Series: 52 Ways to Celebrate Friendship',
 "My Sister's Keeper : A Novel (Picoult, Jodi)",
 'Calvin and Hobbes',
 '84 Charing Cross Road',
 'The Return of the King (The Lord of the Rings, Part 3)',
 'Lonesome Dove',
 'The Giving Tree']

In [16]:
cleaned_dataset

Unnamed: 0,user_id,isbn,book_rating
1,276726,0155061224,2
3,276729,052165615X,1
4,276729,0521795028,3
8,276744,038550120X,3
16,276747,0060517794,4
...,...,...,...
1149771,276704,0743211383,3
1149773,276704,0806917695,2
1149775,276704,1563526298,4
1149777,276709,0515107662,5


In [17]:
merged_df

Unnamed: 0,user_id,isbn,book_rating
1,276726,0155061224,2
3,276729,052165615X,1
4,276729,0521795028,3
8,276744,038550120X,3
16,276747,0060517794,4
...,...,...,...
5,1001,3442410665,3
6,1001,3404921038,2
7,1001,038078243X,4
8,1001,0375410538,3
