In [1]:
import pandas as pd
import numpy as np
import csv

In [2]:
books_df = pd.read_csv('books.csv',usecols=['bookId','title'],dtype={'bookId': 'int32', 'title': 'str'})
rating_df=pd.read_csv('ratings.csv',usecols=['userId', 'bookId', 'rating'],
    dtype={'userId': 'int32', 'bookId': 'int32', 'rating': 'float32'})

In [3]:
print(books_df.head())

   bookId                               title
0       1                    Toy Story (1995)
1       2                      Jumanji (1995)
2       3             Grumpier Old Men (1995)
3       4            Waiting to Exhale (1995)
4       5  Father of the Bride Part II (1995)


In [4]:
print(rating_df.head())

   userId  bookId  rating
0       1       1     4.0
1       1       3     4.0
2       1       6     4.0
3       1      47     5.0
4       1      50     5.0


In [5]:
df = pd.merge(rating_df,books_df,on='bookId')
print(df.head())

   userId  bookId  rating             title
0       1       1     4.0  Toy Story (1995)
1       1       1     4.0              ravi
2       5       1     4.0  Toy Story (1995)
3       5       1     4.0              ravi
4       7       1     4.5  Toy Story (1995)


In [6]:
combine_book_rating = df.dropna(axis = 0, subset = ['title'])
book_ratingCount = (combine_book_rating.
     groupby(by = ['title'])['rating'].
     count().
     reset_index().
     rename(columns = {'rating': 'totalRatingCount'})
     [['title', 'totalRatingCount']]
    )

In [7]:
print(book_ratingCount.head())

                                     title  totalRatingCount
0                               '71 (2014)                 1
1  'Hellboy': The Seeds of Creation (2004)                 1
2                   'Round Midnight (1986)                 2
3                      'Salem's Lot (2004)                 1
4                'Til There Was You (1997)                 2


In [8]:
rating_with_totalRatingCount = combine_book_rating.merge(book_ratingCount, left_on = 'title', right_on = 'title', how = 'left')
print(rating_with_totalRatingCount.head())

   userId  bookId  rating             title  totalRatingCount
0       1       1     4.0  Toy Story (1995)               216
1       1       1     4.0              ravi               216
2       5       1     4.0  Toy Story (1995)               216
3       5       1     4.0              ravi               216
4       7       1     4.5  Toy Story (1995)               216


In [9]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
print(book_ratingCount['totalRatingCount'].describe())

count   9722.000
mean      10.407
std       22.524
min        1.000
25%        1.000
50%        3.000
75%        9.000
max      329.000
Name: totalRatingCount, dtype: float64


In [10]:
popularity_threshold = 50
rating_popular_book= rating_with_totalRatingCount.query('totalRatingCount >= @popularity_threshold')
print(rating_popular_book.head())

   userId  bookId  rating             title  totalRatingCount
0       1       1   4.000  Toy Story (1995)               216
1       1       1   4.000              ravi               216
2       5       1   4.000  Toy Story (1995)               216
3       5       1   4.000              ravi               216
4       7       1   4.500  Toy Story (1995)               216


In [11]:
print(rating_popular_book.shape)

(41691, 5)


In [12]:
book_features_df=rating_popular_book.pivot_table(index='title',columns='userId',values='rating').fillna(0)
print(book_features_df.head())

userId                              1     2     3     4     5     6     7    \
title                                                                         
10 Things I Hate About You (1999) 0.000 0.000 0.000 0.000 0.000 0.000 0.000   
12 Angry Men (1957)               0.000 0.000 0.000 5.000 0.000 0.000 0.000   
2001: A Space Odyssey (1968)      0.000 0.000 0.000 0.000 0.000 0.000 4.000   
28 Days Later (2002)              0.000 0.000 0.000 0.000 0.000 0.000 0.000   
300 (2007)                        0.000 0.000 0.000 0.000 0.000 0.000 0.000   

userId                              8     9     10   ...   601   602   603  \
title                                                ...                     
10 Things I Hate About You (1999) 0.000 0.000 0.000  ... 0.000 0.000 3.000   
12 Angry Men (1957)               0.000 0.000 0.000  ... 5.000 0.000 0.000   
2001: A Space Odyssey (1968)      0.000 0.000 0.000  ... 0.000 0.000 5.000   
28 Days Later (2002)              0.000 0.000 0.000  ...

In [27]:
from scipy.sparse import csr_matrix

book_features_df_matrix = csr_matrix(book_features_df.values)

from sklearn.neighbors import NearestNeighbors


In [15]:
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(book_features_df_matrix)

print(book_features_df.shape)

(452, 606)


In [24]:
query_index = np.random.choice(book_features_df.shape[0])
print(query_index)

278


In [25]:
distances, indices = model_knn.kneighbors(book_features_df.iloc[query_index,:].values.reshape(1, -1), n_neighbors = 6)

print(book_features_df.head())

userId                              1     2     3     4     5     6     7    \
title                                                                         
10 Things I Hate About You (1999) 0.000 0.000 0.000 0.000 0.000 0.000 0.000   
12 Angry Men (1957)               0.000 0.000 0.000 5.000 0.000 0.000 0.000   
2001: A Space Odyssey (1968)      0.000 0.000 0.000 0.000 0.000 0.000 4.000   
28 Days Later (2002)              0.000 0.000 0.000 0.000 0.000 0.000 0.000   
300 (2007)                        0.000 0.000 0.000 0.000 0.000 0.000 0.000   

userId                              8     9     10   ...   601   602   603  \
title                                                ...                     
10 Things I Hate About You (1999) 0.000 0.000 0.000  ... 0.000 0.000 3.000   
12 Angry Men (1957)               0.000 0.000 0.000  ... 5.000 0.000 0.000   
2001: A Space Odyssey (1968)      0.000 0.000 0.000  ... 0.000 0.000 5.000   
28 Days Later (2002)              0.000 0.000 0.000  ...

In [18]:
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'.format(book_features_df.index[query_index]))
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, book_features_df.index[indices.flatten()[i]], distances.flatten()[i]))

Recommendations for Ed Wood (1994):

1: Get Shorty (1995), with distance of 0.510016918182373:
2: Quiz Show (1994), with distance of 0.551247239112854:
3: Four Weddings and a Funeral (1994), with distance of 0.558853268623352:
4: Clerks (1994), with distance of 0.5813702344894409:
5: Dave (1993), with distance of 0.5901564359664917:


In [19]:
print("If you want to give rating write yes otherwise no : ")
rat = input()

If you want to give rating write yes otherwise no : 
yes


In [20]:
if(rat == 'yes'):
    with open('books.csv', 'a', newline='') as file:
        writer = csv.writer(file)
        user_id = input("Enter the id of user : ")
        book_name = input("Enter the book name : ")
        genres = input("Enter genres of that book : ")
        writer.writerow([user_id, book_name, genres])
    with open('ratings.csv', 'a', newline='') as file:
        writer = csv.writer(file)
        book_id = input("Enter the book id : ")
        rating = input("Rating : ")
        timestamp = input("Enter timestamp : ")
        writer.writerow([book_id,user_id, rating, timestamp])
    print("Record stored in our database")

else:
    print("Thank You !")

Enter the id of user : 120
Enter the book name : harry potter
Enter genres of that book : action
Enter the book id : 3
Rating : 5
Enter timestamp : sdf
Record stored in our database
