In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
Final_Dataset = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Book Recommendation System/Final_Dataset.csv')
Final_Dataset.head()

Unnamed: 0.1,Unnamed: 0,User-ID,Age,Country,ISBN,Book-Rating,Avg_Rating,Total_No_Of_Users_Rated,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,0,8,33.0,canada,2005018,5,7.666667,9,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada
1,1,11676,28.0,,2005018,8,7.666667,9,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada
2,2,67544,30.0,canada,2005018,8,7.666667,9,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada
3,3,116866,32.0,other,2005018,9,7.666667,9,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada
4,4,123629,33.0,canada,2005018,9,7.666667,9,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada


# <font style="color: red"> **Popularity Based Filtering**

<font color='voilet'> *As the name suggests Popularity based recommendation system works with the trend. It basically uses the items which are in trend right now. For example, if any book which is usually bought by every new user then there are chances that it may suggest that book to the user who just signed up.<br>
Book weighted avg formula:<br>
Weighted Rating(WR)=[vR/(v+m)]+[mC/(v+m)]<br>
where,<br>
v is the number of votes for the books;<br>
m is the minimum votes required to be listed in the chart;<br>
R is the average rating of the book; and<br>
C is the mean vote across the whole report.<br>
Now we find the values of v,m,R,C.*

In [None]:
C= Final_Dataset['Avg_Rating'].mean()
m= Final_Dataset['Total_No_Of_Users_Rated'].quantile(0.90)
Top_Books = Final_Dataset.loc[Final_Dataset['Total_No_Of_Users_Rated'] >= m]
print(f'C={C} , m={m}')
Top_Books.shape

C=7.626700569505161 , m=64.0


(38570, 12)

<font color='voilet'> *Here we used 90th percentile as our cutoff. In other words, for a book to feature in the charts, it must have more votes than at least 90% of the books in the list.<br>
We see that there are 38570 books which qualify to be in this list. Now, we need to calculate our metric for each qualified book. To do this, we will define a function, weighted_rating() and define a new feature score, of which we’ll calculate the value by applying this function to our DataFrame of qualified books:*


In [None]:
def weighted_rating(x, m=m, C=C):
    v = x['Total_No_Of_Users_Rated']
    R = x['Avg_Rating']
    return (v/(v+m) * R) + (m/(m+v) * C)


Top_Books['Score'] = Top_Books.apply(weighted_rating,axis=1)


#Sorting books based on score calculated above
Top_Books = Top_Books.sort_values('Score', ascending=False)

In [None]:
#Keeping only one entry of each book
Top_Books=Top_Books.sort_values('Score', ascending=False).drop_duplicates('ISBN').sort_index()
cm=sns.light_palette('yellow',as_cmap=True)
#Sorting books based on score calculated above
Top_Books = Top_Books.sort_values('Score', ascending=False)

#Printing the top 20 books
Top_Books[['Book-Title', 'Total_No_Of_Users_Rated', 'Avg_Rating', 'Score']].reset_index(drop=True).head(20).style.background_gradient(cmap=cm)

Unnamed: 0,Book-Title,Total_No_Of_Users_Rated,Avg_Rating,Score
0,Harry Potter and the Goblet of Fire (Book 4),137,9.262774,8.741835
1,Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback)),313,8.939297,8.716469
2,Harry Potter and the Order of the Phoenix (Book 5),206,9.033981,8.700403
3,To Kill a Mockingbird,214,8.943925,8.640679
4,Harry Potter and the Prisoner of Azkaban (Book 3),133,9.082707,8.60969
5,"The Return of the King (The Lord of the Rings, Part 3)",77,9.402597,8.596517
6,Harry Potter and the Prisoner of Azkaban (Book 3),141,9.035461,8.595653
7,Harry Potter and the Sorcerer's Stone (Book 1),119,8.983193,8.508791
8,Harry Potter and the Chamber of Secrets (Book 2),189,8.783069,8.490549
9,Harry Potter and the Chamber of Secrets (Book 2),126,8.920635,8.484783


<font color='red'> **The Popularity based recommender provide a general chart of recommended books to all the users. They are not sensitive to the interests and tastes of a particular user.**

In [None]:
Top_Books.head(1)

Unnamed: 0.1,Unnamed: 0,User-ID,Age,Country,ISBN,Book-Rating,Avg_Rating,Total_No_Of_Users_Rated,Book-Title,Book-Author,Year-Of-Publication,Publisher,Score
5954,5954,171011,13.0,usa,439139597,10,9.262774,137,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000.0,Scholastic,8.741835


In [None]:
Books = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Book Recommendation System/Books.csv')
Top_Books = pd.merge(Top_Books, Books, on='ISBN')
Top_Books = Top_Books[['ISBN','Book-Title_x','Book-Author_x','Year-Of-Publication_x','Publisher_x', 'Total_No_Of_Users_Rated', 'Avg_Rating','Image-URL-L']]
print(Top_Books.shape)
Top_Books.head()

(333, 8)


Unnamed: 0,ISBN,Book-Title_x,Book-Author_x,Year-Of-Publication_x,Publisher_x,Total_No_Of_Users_Rated,Avg_Rating,Image-URL-L
0,0439139597,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000.0,Scholastic,137,9.262774,http://images.amazon.com/images/P/0439139597.0...
1,059035342X,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,1999.0,Arthur A. Levine Books,313,8.939297,http://images.amazon.com/images/P/059035342X.0...
2,043935806X,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003.0,Scholastic,206,9.033981,http://images.amazon.com/images/P/043935806X.0...
3,0446310786,To Kill a Mockingbird,Harper Lee,1988.0,Little Brown &amp; Company,214,8.943925,http://images.amazon.com/images/P/0446310786.0...
4,0439136369,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,2001.0,Scholastic,133,9.082707,http://images.amazon.com/images/P/0439136369.0...


In [None]:
Top_Books.to_csv('/content/drive/MyDrive/Colab Notebooks/Book Recommendation System/Top_Books.csv')

# ***Create User Book Matrix***

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
first_half, second_half = train_test_split(Final_Dataset, test_size=0.95, shuffle = False)

In [6]:
del(second_half)
del(Final_Dataset)
first_half.shape

(19192, 12)

In [7]:
first_half[first_half['User-ID']==8]

Unnamed: 0.1,Unnamed: 0,User-ID,Age,Country,ISBN,Book-Rating,Avg_Rating,Total_No_Of_Users_Rated,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,0,8,33.0,canada,0002005018,5,7.666667,9,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada
9,9,8,33.0,canada,074322678X,5,5.0,1,Where You'll Find Me: And Other Stories,Ann Beattie,2002.0,Scribner
10,10,8,33.0,canada,0887841740,5,5.0,1,The Middle Stories,Sheila Heti,2004.0,House of Anansi Press
11,11,8,33.0,canada,1552041778,5,5.0,1,Jane Doe,R. J. Kaiser,1999.0,Mira Books
12,12,8,33.0,canada,1567407781,6,6.0,1,The Witchfinder (Amos Walker Mystery Series),Loren D. Estleman,1998.0,Brilliance Audio - Trade
13,13,8,33.0,canada,1575663937,6,6.0,1,More Cunning Than Man: A Social History of Rat...,Robert Hendrickson,1999.0,Kensington Publishing Corp.
14,14,8,33.0,canada,1881320189,7,7.0,2,Goodbye to the Buttermilk Sky,Julia Oliver,1994.0,River City Pub


In [9]:
first_half['User-ID'].to_csv('/content/drive/MyDrive/Colab Notebooks/Book Recommendation System/first_row.csv')

In [10]:
matrix = first_half.pivot_table(index='User-ID', columns='Book-Title', values='Book-Rating')
matrix.head()

Book-Title,1984,2061: Odyssey Three,"28 Barbary Lane : A \Tales of the City\"" Omnibus""",2nd Chance,"A Beautiful Mind : A Biography of John Forbes Nash, Jr.","A Child Called \It\"": One Child's Courage to Survive""",A Christmas Story,A Cuisine of Leftovers,A Dangerous Place: California's Unsettling Fate (Images of America),A Density of Souls,...,Women on the Case,World of Pies : A Novel,Writing Down the Bones,Wuthering Heights,Wuthering Heights (Penguin Popular Classics),"XIII, tome 13, L'enquÃ?Âªte : the XIII mystery",Yarrow,Year's Best Fantasy (Year's Best Fantasy),You Cannot Be Serious,stardust
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,,,,,,,,,,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,,,,
12,,,,,,,,,,,...,,,,,,,,,,
14,,,,,,,,,,,...,,,,,,,,,,
16,,,,,,,,,,,...,,,,,,,,,,


In [9]:
# all the books that are rated by the user 226745
matrix.loc[56][matrix.loc[56]>0]

Book-Title
Haveli (Laurel Leaf Books)    9.0
LONESOME DOVE                 7.0
Name: 56, dtype: float64

# Cosine Similarity

In [11]:
from sklearn.metrics.pairwise import cosine_similarity

In [12]:
df = matrix

df_filled = df.fillna(0)
original_non_nan_counts = df.notna().sum(axis=1)

# Step 2: Calculate sum of all values per row
sum_of_all = df_filled.sum(axis=1)

# Step 3: Calculate normalization factor only where sum is not zero
normalization_factor = original_non_nan_counts.div(sum_of_all.where(sum_of_all != 0))

# Step 4: Normalize only rows that had non-zero sums initially
normalized_df = df_filled.sub(df_filled.multiply(normalization_factor, axis=0), fill_value=0)

# Step 5: Ensure completely zero rows stay zero
normalized_df[sum_of_all == 0] = 0

# free matrix datafram
# del(matrix)

# Print the resulting DataFrame
normalized_df

Book-Title,1984,2061: Odyssey Three,"28 Barbary Lane : A \Tales of the City\"" Omnibus""",2nd Chance,"A Beautiful Mind : A Biography of John Forbes Nash, Jr.","A Child Called \It\"": One Child's Courage to Survive""",A Christmas Story,A Cuisine of Leftovers,A Dangerous Place: California's Unsettling Fate (Images of America),A Density of Souls,...,Women on the Case,World of Pies : A Novel,Writing Down the Bones,Wuthering Heights,Wuthering Heights (Penguin Popular Classics),"XIII, tome 13, L'enquÃ?Âªte : the XIII mystery",Yarrow,Year's Best Fantasy (Year's Best Fantasy),You Cannot Be Serious,stardust
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278681,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278798,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278832,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278843,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
normalized_df[normalized_df.index==8]['Goodbye to the Buttermilk Sky']

User-ID
8    5.74359
Name: Goodbye to the Buttermilk Sky, dtype: float64

In [14]:
cosine_distance = cosine_similarity(normalized_df)
cosine_distance

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [15]:
cosine_distance = pd.DataFrame(cosine_distance)

In [16]:
cosine_distance

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10823,10824,10825,10826,10827,10828,10829,10830,10831,10832
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10828,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
10829,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
10830,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
10831,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [17]:
first_row = cosine_distance.iloc[0]
# Find indices and values of non-zero columns
non_zero_columns = first_row[first_row != 0]
non_zero_columns

0        1.000000
617      0.026955
2819     0.237826
4695     0.237826
4963     0.336336
7823     0.336336
8261     0.336336
8549     0.237826
8850     0.470871
10271    0.088326
Name: 0, dtype: float64

In [18]:
sorted_df = non_zero_columns.sort_values(ascending=False)
sorted_df

0        1.000000
8850     0.470871
4963     0.336336
7823     0.336336
8261     0.336336
2819     0.237826
8549     0.237826
4695     0.237826
10271    0.088326
617      0.026955
Name: 0, dtype: float64

In [19]:
first_row = cosine_distance.iloc[0]
# Find indices and values of non-zero columns
non_zero_columns = first_row[first_row != 1]
sorted_df = non_zero_columns.sort_values(ascending=False)
sorted_df

0        1.000000
8850     0.470871
7823     0.336336
8261     0.336336
4963     0.336336
           ...   
3613     0.000000
3614     0.000000
3615     0.000000
3616     0.000000
10832    0.000000
Name: 0, Length: 10833, dtype: float64

### **`We can see that User 8 and User 2168 shows a high corelation so lets check the books they are reading and what recomendation should given to both of them looking at their common interest`**

In [20]:
normalized_df.iloc[2168][normalized_df.iloc[2168]>0]

Book-Title
The Phantom Tollbooth    9.0
Name: 50650, dtype: float64

In [21]:
normalized_df.iloc[8][normalized_df.iloc[8]>0]

Book-Title
Seabiscuit: An American Legend    8.052632
To Kill a Mockingbird             8.947368
Name: 26, dtype: float64

In [37]:
first_half.index[first_half['User-ID'] == 8][0]

0

In [38]:
def userBasedCollaborativeFilter(user_id):
  # indexes to navigate in user-book rating matrix
  # indexes = normalized_df.index
  index = first_half.index[first_half['User-ID'] == user_id][0]

  # fetching the books that are already read be the user
  books_read_by_user = normalized_df.loc[user_id][normalized_df.loc[user_id]>0]

  # calculating the cosine distance of this user with all other users, and selecting user with highest cosine relation
  cos_dist = cosine_distance.iloc[index]
  non_zero_columns = cos_dist[cos_dist != 0]
  sorted_df = non_zero_columns.sort_values(ascending=False)
  users_with_similar_interest = sorted_df[sorted_df>.3].index
  books = list()
  for user in users_with_similar_interest[1:]:
     books.append(normalized_df.iloc[user][normalized_df.iloc[user]>0])

  merged_df = pd.concat(books)
  max_ratings = merged_df.groupby(level=0).max()
  max_ratings = max_ratings.sort_values(ascending=False)
  for i in books_read_by_user.index:
    try:
      max_ratings.drop(i, inplace = True)
    except:
      pass

  return max_ratings

In [44]:
recommend = userBasedCollaborativeFilter(200273)
recommend

Book-Title
Bant/Spec.Last of the Breed                                  9.000000
Piercing the Darkness                                        9.000000
The Rescue                                                   8.928571
Summer Island                                                8.052632
The Pilot's Wife : A Novel                                   8.052632
Skipping Christmas                                           8.000000
Midnight in the Garden of Good and Evil: A Savannah Story    7.941176
Prophet                                                      7.920000
Snow Falling on Cedars                                       7.142857
Death in the Clouds                                          7.058824
The Door to December                                         7.000000
From the Corner of His Eye                                   6.933333
The Clan of the Cave Bear (Earth's Children (Paperback))     6.066667
The Nanny Diaries: A Novel                                   5.280000
Slow Walt

In [22]:
normalized_df.to_csv('/content/drive/MyDrive/Colab Notebooks/Book Recommendation System/normalized_df.csv')
cosine_distance.to_csv('/content/drive/MyDrive/Colab Notebooks/Book Recommendation System/cosine_distance.csv')

In [27]:
def get_user(user_id = ''):
  if user_id:
    books_read_by_user = normalized_df.loc[user_id][normalized_df.loc[user_id]>0]
  else:
    books_read_by_user = normalized_df.head()
    books_read_by_user = first_half.sort_values(by='Book-Rating',ascending=False)
  return books_read_by_user.head(10)

In [28]:
get_user(8)

Book-Title
Clara Callan                                               4.102564
Goodbye to the Buttermilk Sky                              5.743590
Jane Doe                                                   4.102564
More Cunning Than Man: A Social History of Rats and Man    4.923077
The Middle Stories                                         4.102564
The Witchfinder (Amos Walker Mystery Series)               4.923077
Where You'll Find Me: And Other Stories                    4.102564
Name: 8, dtype: float64

In [None]:
get_user(882)

Book-Title
A room with a view                                                                                                  8.840580
Across the Bitter Sea                                                                                               7.956522
Bridge to Terabithia                                                                                                8.840580
CADDIE WOODLAWN                                                                                                     8.840580
Dancing On My Grave Gelsey Kirkland                                                                                 7.956522
Diceys Song                                                                                                         7.956522
Eva Luna                                                                                                            8.840580
Farewell to Manzanar: A True Story of Japanese American Experience During and  After the World War II Internment  

In [None]:
X = normalized_df.to_numpy()

In [None]:
X

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
from sklearn.decomposition import NMF

In [None]:
model = NMF(n_components=2, init='random', random_state=0)

In [None]:
W = model.fit_transform(X)

In [None]:
H = model.components_

In [None]:
W

array([[0.00325121, 0.00138479],
       [0.01196238, 0.00985759],
       [0.        , 0.        ],
       ...,
       [0.03135685, 0.016819  ],
       [0.03232558, 0.01002755],
       [0.01869817, 0.00810086]])

In [None]:
H

array([[0.1701421 , 0.03891201, 0.00290061, ..., 0.00041601, 0.00570177,
        0.06876529],
       [0.145641  , 0.00174094, 0.02356109, ..., 0.0002287 , 0.03886875,
        0.03918858]])

In [None]:
print('original matrix')

print(X)
print('---------------------------')
print('\n Factorized Matrix')

print(np.dot(W, H))

original matrix
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
---------------------------

 Factorized Matrix
[[7.54849183e-04 1.28921777e-04 4.20576747e-05 ... 1.66922508e-06
  7.23626921e-05 2.77838063e-04]
 [3.47097337e-03 4.82641800e-04 2.66953745e-04 ... 7.23086347e-06
  4.51358718e-04 1.20890147e-03]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [7.78465648e-03 1.24943907e-03 4.87228148e-04 ... 1.68911696e-05
  8.32522898e-04 2.81537582e-03]
 [6.96036479e-03 1.27531080e-03 3.30024117e-04 ... 1.57409609e-05
  5.74071266e-04 2.61584357e-03]
 [4.36116445e-03 7.41686689e-04 2.45101402e-04 ... 9.63123485e-06
  4.21483057e-04 1.60324676e-03]]


In [None]:
factorized_array = np.dot(W,H)
factorized_array

array([[7.54849183e-04, 1.28921777e-04, 4.20576747e-05, ...,
        1.66922508e-06, 7.23626921e-05, 2.77838063e-04],
       [3.47097337e-03, 4.82641800e-04, 2.66953745e-04, ...,
        7.23086347e-06, 4.51358718e-04, 1.20890147e-03],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [7.78465648e-03, 1.24943907e-03, 4.87228148e-04, ...,
        1.68911696e-05, 8.32522898e-04, 2.81537582e-03],
       [6.96036479e-03, 1.27531080e-03, 3.30024117e-04, ...,
        1.57409609e-05, 5.74071266e-04, 2.61584357e-03],
       [4.36116445e-03, 7.41686689e-04, 2.45101402e-04, ...,
        9.63123485e-06, 4.21483057e-04, 1.60324676e-03]])

In [None]:
# Create a copy of normalized dataframe and then paste the ratings from factorized matrix data into the copied dataframe
factorized_df = normalized_df.copy()
factorized_df

Book-Title,1984,2061: Odyssey Three,"28 Barbary Lane : A \Tales of the City\"" Omnibus""",2nd Chance,"A Beautiful Mind : A Biography of John Forbes Nash, Jr.","A Child Called \It\"": One Child's Courage to Survive""",A Christmas Story,A Cuisine of Leftovers,A Dangerous Place: California's Unsettling Fate (Images of America),A Density of Souls,...,Women on the Case,World of Pies : A Novel,Writing Down the Bones,Wuthering Heights,Wuthering Heights (Penguin Popular Classics),"XIII, tome 13, L'enquÃ?Âªte : the XIII mystery",Yarrow,Year's Best Fantasy (Year's Best Fantasy),You Cannot Be Serious,stardust
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278681,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278798,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278832,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278843,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
factorized_df.iloc[:] = factorized_array
factorized_df.head(10)

Book-Title,1984,2061: Odyssey Three,"28 Barbary Lane : A \Tales of the City\"" Omnibus""",2nd Chance,"A Beautiful Mind : A Biography of John Forbes Nash, Jr.","A Child Called \It\"": One Child's Courage to Survive""",A Christmas Story,A Cuisine of Leftovers,A Dangerous Place: California's Unsettling Fate (Images of America),A Density of Souls,...,Women on the Case,World of Pies : A Novel,Writing Down the Bones,Wuthering Heights,Wuthering Heights (Penguin Popular Classics),"XIII, tome 13, L'enquÃ?Âªte : the XIII mystery",Yarrow,Year's Best Fantasy (Year's Best Fantasy),You Cannot Be Serious,stardust
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,0.0007548492,0.0001289218,4.205767e-05,0.0004072945,1.305761e-05,0.0007566456,4.830448e-08,3.364104e-08,4.20513e-08,5.636565e-05,...,7.696121e-06,5.605027e-05,8.701779e-05,2.268062e-05,1.309133e-05,8.317515e-07,9.031868e-05,1.669225e-06,7.236269e-05,0.0002778381
9,0.003470973,0.0004826418,0.0002669537,0.002110517,9.123549e-05,0.003573463,2.761707e-07,1.264435e-07,1.580544e-07,0.0003561676,...,3.671196e-05,0.0003828158,0.0004616214,0.00010333,8.511915e-05,3.362822e-06,0.0003357444,7.230863e-06,0.0004513587,0.001208901
12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,2.948904e-06,1.892515e-07,3.742303e-07,2.269417e-06,1.400286e-07,3.224827e-06,3.429659e-10,5.081688e-11,6.35211e-11,4.969671e-07,...,3.384235e-08,5.759199e-07,5.154293e-07,8.586265e-08,1.222635e-07,1.931582e-09,1.257925e-07,5.252185e-09,6.211957e-07,8.894611e-07
16,0.001588314,1.898613e-05,0.0002569496,0.001401283,9.890641e-05,0.001807874,2.254233e-07,6.104421e-09,7.630526e-09,0.0003406917,...,1.922425e-05,0.0004043717,0.000323914,4.552324e-05,8.461624e-05,6.927114e-07,7.85149e-06,2.494132e-06,0.0004238899,0.000427378
17,0.002274541,0.0004026315,0.0001172754,0.001196729,3.533666e-05,0.002267843,1.386056e-07,1.049987e-07,1.312484e-07,0.0001573785,...,2.302017e-05,0.0001528169,0.0002543022,6.846561e-05,3.624428e-05,2.565613e-06,0.0002823773,5.086919e-06,0.0002028011,0.0008460173
19,0.001049689,0.0001835909,5.560521e-05,0.0005570762,1.693663e-05,0.001048498,6.505567e-08,4.788683e-08,5.985854e-08,7.458481e-05,...,1.065037e-05,7.304623e-05,0.000118599,3.157714e-05,1.722904e-05,1.174706e-06,0.0001287114,2.338621e-06,9.598329e-05,0.000389048
22,1.0490449999999999e-237,1.253991e-239,1.697094e-238,9.255153999999999e-238,6.532543e-239,1.194059e-237,1.4888700000000001e-241,4.031831e-243,5.039789e-243,2.250191e-238,...,1.269718e-239,2.670783e-238,2.139378e-238,3.006707e-239,5.58871e-239,4.575201e-241,5.185729999999999e-240,1.647317e-240,2.7996969999999998e-238,2.8227339999999996e-238
26,0.03462385,0.005131899,0.002450983,0.02036814,0.0008202226,0.03537472,2.599129e-06,1.342689e-06,1.678361e-06,0.00327343,...,0.0003623982,0.003458295,0.004427618,0.001033512,0.0007772797,3.487545e-05,0.00357837,7.341076e-05,0.004160656,0.01225693
32,0.0006881877,8.226348e-06,0.0001113317,0.0006071507,4.285437e-05,0.0007833191,9.767188e-08,2.644936e-09,3.30617e-09,0.0001476156,...,8.329523e-06,0.000175207,0.0001403461,1.97244e-05,3.66627e-05,3.001394e-07,3.40191e-06,1.080663e-06,0.0001836639,0.0001851752


In [None]:
get_user(56)

Book-Title
Haveli (Laurel Leaf Books)    7.875
LONESOME DOVE                 6.125
Name: 56, dtype: float64

In [None]:
factorized_df.iloc[56].sort_values(ascending=False)

Book-Title
Harry Potter and the Chamber of Secrets (Book 2)       0.096203
Harry Potter and the Prisoner of Azkaban (Book 3)      0.090874
The Lovely Bones: A Novel                              0.076737
Harry Potter and the Goblet of Fire (Book 4)           0.052605
Harry Potter and the Sorcerer's Stone (Book 1)         0.041833
                                                         ...   
Going down swinging                                    0.000000
Tollivers Reisen. Stadtgeschichten IV.                 0.000000
The Vulcan Academy Murders                             0.000000
The Adventures of Drew and Ellie: The Magical Dress    0.000000
A Temporary New Wife                                   0.000000
Name: 226, Length: 817, dtype: float64

In [None]:
recommend = userBasedCollaborativeFilter(56)
recommend

Book-Title
The Scarlet Letter                                                                               9.000000
The Time Traveler's Wife                                                                         8.823529
Snow Falling on Cedars                                                                           7.058824
The Pilot's Wife : A Novel Tag: Author of the Weight of Water (Oprah's Book Club (Hardcover))    7.058824
Dragonfly in Amber                                                                               6.909091
A Painted House                                                                                  6.176471
So You Want to Be a Wizard: The First Book in the Young Wizards Series                           5.666667
The Street Lawyer                                                                                5.181818
Me Talk Pretty One Day                                                                           5.000000
Chosen Prey                        