# Importing necessary packages

In [330]:
import numpy as np
import pandas as pd

# Importing  and reading datasets

In [331]:
# Load the books dataset
books_df = pd.read_csv("./dataset/Books.csv", low_memory = False, usecols=["ISBN", "Book-Title", "Book-Author","Year-Of-Publication"])

# Load the users dataset
users_df = pd.read_csv("./dataset/Users.csv", low_memory = False)

# Load the ratings dataset
ratings_df = pd.read_csv("./dataset/Ratings.csv", low_memory = False)


In [332]:
books_df.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication
0,195153448,Classical Mythology,Mark P. O. Morford,2002
1,2005018,Clara Callan,Richard Bruce Wright,2001
2,60973129,Decision in Normandy,Carlo D'Este,1991
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999


In [333]:
ratings_df.head()


Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [334]:
users_df.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


# Printing total datasets

In [335]:
print(books_df.shape)
print(ratings_df.shape)
print(users_df.shape)

(271360, 4)
(1149780, 3)
(278858, 3)


#  The condition of this recommendation system is that the user must have rated  atleast 100 books and the book must have at least 100 ratings to be considered


In [336]:
# Filter out users who have reviewed fewer than 100 books
users_filtered = users_df[users_df['User-ID'].isin(ratings_df['User-ID'].value_counts()[ratings_df['User-ID'].value_counts() >= 100].index)]
users_filtered 


Unnamed: 0,User-ID,Location,Age
182,183,"porto, porto, portugal",27.0
253,254,"minneapolis, minnesota, usa",24.0
506,507,"dumas, arkansas, usa",
881,882,"berkeley, california, usa",23.0
1423,1424,"north avoca, new south wales, australia",
...,...,...,...
277477,277478,"schiedam, zuid-holland, netherlands",31.0
277638,277639,"forsyth, montana, usa",48.0
278136,278137,"san antonio, texas, usa",27.0
278187,278188,"lake george, new york, usa",34.0


In [337]:
# Filter out books that have fewer than 100 reviews
books_filtered = books_df[books_df['ISBN'].isin(ratings_df['ISBN'].value_counts()[ratings_df['ISBN'].value_counts() >= 100].index)]
books_filtered


Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication
18,0440234743,The Testament,John Grisham,1999
19,0452264464,Beloved (Plume Contemporary Fiction),Toni Morrison,1994
26,0971880107,Wild Animus,Rich Shapero,2004
27,0345402871,Airframe,Michael Crichton,1997
28,0345417623,Timeline,MICHAEL CRICHTON,2000
...,...,...,...,...
28071,0425178765,Easy Prey,John Sandford,2001
29214,0449223604,M Is for Malice,Sue Grafton,1998
30534,0345444884,The Talisman,STEPHEN KING,2001
30774,0060008032,Angels,Marian Keyes,2003


#  Merging the datasets

In [338]:
# Merge the filtered users and ratings dataframes
ratings_filtered = pd.merge(users_filtered, ratings_df, on='User-ID')
ratings_filtered

Unnamed: 0,User-ID,Location,Age,ISBN,Book-Rating
0,183,"porto, porto, portugal",27.0,058608195X,0
1,183,"porto, porto, portugal",27.0,100940/86,9
2,183,"porto, porto, portugal",27.0,10622/86,0
3,183,"porto, porto, portugal",27.0,10745/85,0
4,183,"porto, porto, portugal",27.0,10756/85,0
...,...,...,...,...,...
658800,278418,"omaha, nebraska, usa",,5008601364,0
658801,278418,"omaha, nebraska, usa",,5008602064,0
658802,278418,"omaha, nebraska, usa",,528826859,0
658803,278418,"omaha, nebraska, usa",,684124645,0


In [339]:
# Merge the filtered ratings and books dataframes
df = pd.merge(ratings_filtered, books_filtered, on='ISBN')
df


Unnamed: 0,User-ID,Location,Age,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication
0,254,"minneapolis, minnesota, usa",24.0,0060930535,0,The Poisonwood Bible: A Novel,Barbara Kingsolver,1999
1,1424,"north avoca, new south wales, australia",,0060930535,7,The Poisonwood Bible: A Novel,Barbara Kingsolver,1999
2,2891,"beaverton, oregon, usa",,0060930535,0,The Poisonwood Bible: A Novel,Barbara Kingsolver,1999
3,5903,"vienna, vienna, austria",25.0,0060930535,0,The Poisonwood Bible: A Novel,Barbara Kingsolver,1999
4,6251,"wahiawa, hawaii, usa",32.0,0060930535,0,The Poisonwood Bible: A Novel,Barbara Kingsolver,1999
...,...,...,...,...,...,...,...,...
65518,263460,"tampa, florida, usa",31.0,0671867156,0,Pretend You Don't See Her,Mary Higgins Clark,1998
65519,269566,"seattle, washington, usa",9.0,0671867156,0,Pretend You Don't See Her,Mary Higgins Clark,1998
65520,271284,"alexandria, virginia, usa",50.0,0671867156,0,Pretend You Don't See Her,Mary Higgins Clark,1998
65521,273979,"bloomsburg, pennsylvania, usa",20.0,0671867156,0,Pretend You Don't See Her,Mary Higgins Clark,1998


# Create pivot table

In [340]:
# Create a pivot table of users and books, with ratings as the values
user_item_matrix = df.pivot_table(index='User-ID', columns='Book-Title', values='Book-Rating')


In [341]:

user_item_matrix.fillna(0,inplace=True)
user_item_matrix


Book-Title,1984,1st to Die: A Novel,2nd Chance,4 Blondes,A Beautiful Mind: The Life of Mathematical Genius and Nobel Laureate John Nash,A Bend in the Road,A Case of Need,"A Child Called \It\"": One Child's Courage to Survive""",A Civil Action,A Confederacy of Dunces (Evergreen Book),...,Wicked: The Life and Times of the Wicked Witch of the West,Wifey,Wild Animus,Winter Moon,Wish You Well,Without Remorse,Year of Wonders,You Belong To Me,Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,"\O\"" Is for Outlaw"""
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
507,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
882,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1424,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0
1435,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,...,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
277639,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278137,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278188,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# calculate cosine similarilty

In [342]:
#import cosine_similarity
from sklearn.metrics.pairwise import cosine_similarity



In [343]:
# Compute the cosine similarity between users
similarity = cosine_similarity(user_item_matrix)

similarity


array([[1.        , 0.22016014, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.22016014, 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [344]:
similarity.shape



(1776, 1776)

# Main recommendation function

In [409]:
# Function to get the top n similar books for a given book
def recommend_book(user_id, num_books):
    #  Get the index of the user in the users list
    user_index = user_item_matrix.index.get_loc(user_id)
    # Get the ratings of the user
    user_ratings = user_item_matrix.iloc[user_index].dropna()

    # Compute the similarity of the user with all other users
    similar_users = similarity[user_index]

    # Get the indices of the most similar users
    most_similar_users = similar_users.argsort()[::-1][1:]
    
    
    # Get the ratings of the most similar users
    most_similar_ratings = user_item_matrix.iloc[most_similar_users]

    # Get the sum of the similarity of the user with the most similar users
    sum_similarities = similar_users[most_similar_users].sum()

    # Initialize a list to store the recommended books
    recommendations = []

    # Iterate through the user's ratings and add the weighted rating of each book to the list
    for rating in user_ratings.index:
        # Get the weight of the rating (the similarity of the user with the most similar users who rated the book)
        weight = similar_users[most_similar_users][most_similar_ratings[rating].notnull()].sum() / sum_similarities

        # Get the weighted rating of the book and add it to the list
        recommendations.append((rating, user_ratings[rating] * weight))

        # Sort the list of recommendations in descending order
        recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)

    # Return the top n recommendations
    return recommendations[:num_books]

# Output

In [411]:
recommend_book(254, 5)

[('American Gods', 10.0),
 ('1984', 9.0),
 ('Harry Potter and the Chamber of Secrets (Book 2)', 9.0),
 ('Harry Potter and the Goblet of Fire (Book 4)', 9.0),
 ('Harry Potter and the Prisoner of Azkaban (Book 3)', 9.0)]