<a href="https://colab.research.google.com/github/swaroopkasaraneni/DatasciencePython/blob/main/AssociationRules.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Key issues: As of now a lot of users return the booksand do not take the new rental.  The rightrecommendation will entice a usersto rent more books


```
Fields in Data
•user_id: Unique Id of the User
•isbn: International Standard Book Number is a unique numeric commercial book identifier
•rating: the rating given by the user
```

Increase in both top line and bottom line as more rentals per user means more revenue and more profit



In [3]:
import pandas as pd

# Load the CSV files into pandas DataFrames
user_info_df = pd.read_csv("/content/BX-Users.csv", encoding="ISO-8859-1")
books_info_df = pd.read_csv("/content/BX-Books.csv", encoding="ISO-8859-1")
ratings_info_df = pd.read_csv("/content/BX-Book-Ratings.csv", encoding="ISO-8859-1")

# Display the first few rows of each DataFrame to check the data
print(user_info_df.head())
print(books_info_df.head())
print(ratings_info_df.head())

# Handling missing values (e.g., fill missing age with the mean age or drop rows)
user_info_df['Age'] = user_info_df['Age'].fillna(user_info_df['Age'].mean())

# Display cleaned data
print(user_info_df.head())

# Create the user-item interaction matrix (ratings)
user_item_matrix = ratings_info_df.pivot_table(index='user_id', columns='isbn', values='rating', fill_value=0)

# Display the user-item matrix
print(user_item_matrix.head())




   user_id                            Location   Age
0        1                  nyc, new york, usa   NaN
1        2           stockton, california, usa  18.0
2        3     moscow, yukon territory, russia   NaN
3        4           porto, v.n.gaia, portugal  17.0
4        5  farnborough, hants, united kingdom   NaN
        isbn                                         book_title  \
0  195153448                                Classical Mythology   
1    2005018                                       Clara Callan   
2   60973129                               Decision in Normandy   
3  374157065  Flu: The Story of the Great Influenza Pandemic...   
4  393045218                             The Mummies of Urumchi   

            book_author  year_of_publication                   publisher  
0    Mark P. O. Morford                 2002     Oxford University Press  
1  Richard Bruce Wright                 2001       HarperFlamingo Canada  
2          Carlo D'Este                 1991          

In [4]:
from sklearn.metrics import pairwise_distances

# Compute pairwise cosine similarity between users
user_similarity = 1 - pairwise_distances(user_item_matrix, metric='cosine')

# Display user similarity matrix
print(user_similarity)


[[0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 1.]]


In [9]:
import numpy as np

def recommend_books(user_id, user_item_matrix, user_similarity, top_n=5):
    # Find the most similar users (based on the smallest distance)
    similar_users = np.argsort(user_similarity[user_id - 1])[::-1]  # Reverse for most similar

    # Get the books rented by the similar users
    recommended_books = set()
    for similar_user in similar_users[1:]:  # Skip the user itself
        similar_user_ratings = user_item_matrix.iloc[similar_user]
        rented_books = similar_user_ratings[similar_user_ratings > 0].index
        recommended_books.update(rented_books)

    # Filter out books the user has already rented
    user_rented_books = user_item_matrix.iloc[user_id - 1]
    already_rented = set(user_rented_books[user_rented_books > 0].index)
    new_recommendations = recommended_books - already_rented

    # Return top N recommendations
    return list(new_recommendations)[:top_n]

# Get top 3 book recommendations for user 1
recommended_books_for_user_1 = recommend_books(1, user_item_matrix, user_similarity, top_n=3)
print("Recommended books for User 1:", recommended_books_for_user_1)


Recommended books for User 1: ['3442443806', '743486226', '3596147700']


In [7]:
from sklearn.metrics import precision_score, recall_score

# Example: Actual rented books by user 1
actual_rented_books = ['034545104X', ]

all_book_ids = set(actual_rented_books).union(set(recommended_books_for_user_1))

actual_books_binary = [1 if book_id in actual_rented_books else 0 for book_id in all_book_ids]
recommended_books_binary = [1 if book_id in recommended_books_for_user_1 else 0 for book_id in all_book_ids]

# Calculate precision and recall
precision = precision_score(actual_books_binary, recommended_books_binary)
recall = recall_score(actual_books_binary, recommended_books_binary)

print(f"Precision: {precision}")
print(f"Recall: {recall}")


Precision: 0.0
Recall: 0.0


In [14]:
all_recommendations = {}

for user_id in user_item_matrix.index:
    if user_id in user_item_matrix.index and user_id in user_similarity:
        all_recommendations[user_id] = recommend_books(user_id, user_item_matrix, user_similarity, top_n=3)
    else:
        print(f"User {user_id} not found in user_item_matrix.")

print(all_recommendations)


User 276725 not found in user_item_matrix.
User 276726 not found in user_item_matrix.
User 276727 not found in user_item_matrix.
User 276729 not found in user_item_matrix.
User 276733 not found in user_item_matrix.
User 276736 not found in user_item_matrix.
User 276737 not found in user_item_matrix.
User 276744 not found in user_item_matrix.
User 276745 not found in user_item_matrix.
User 276746 not found in user_item_matrix.
User 276747 not found in user_item_matrix.
User 276748 not found in user_item_matrix.
User 276751 not found in user_item_matrix.
User 276754 not found in user_item_matrix.
User 276755 not found in user_item_matrix.
User 276760 not found in user_item_matrix.
User 276762 not found in user_item_matrix.
User 276765 not found in user_item_matrix.
User 276768 not found in user_item_matrix.
User 276772 not found in user_item_matrix.
User 276774 not found in user_item_matrix.
User 276780 not found in user_item_matrix.
User 276786 not found in user_item_matrix.
User 276788