In [1]:
import pandas as pd

# Load Datasets

In [2]:
ratings = pd.read_csv('ratings.csv')
books = pd.read_csv('books.csv')

# Merge Ratings with Book Info

In [3]:
merged = ratings.merge(books, on='book_id')

# Calculate average rating and rating count

In [4]:
book_stats = merged.groupby(['book_id', 'title']).agg(
    avg_rating=('rating', 'mean'),
    rating_count=('rating', 'count')
).reset_index()

# Recommend top 3 popular books

In [5]:
def get_popular_books(top_n=3):
    top_books = book_stats.sort_values(by=['rating_count', 'avg_rating'], ascending=False).head(top_n)
    return top_books[['title', 'avg_rating', 'rating_count']]
print("Top Recommended Books:")
print(get_popular_books())

Top Recommended Books:
           title  avg_rating  rating_count
1           1984         8.5             2
0  The Alchemist         8.0             2
2     The Hobbit        10.0             1


## Personalized recommendation system using Collaborative Filtering (SVD)

In [7]:
# !pip install scikit-surprise pandas


In [8]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

# Load and prepare ratings data
ratings = pd.read_csv("ratings.csv")
books = pd.read_csv("books.csv")

# Surprise needs data in (user, item, rating) format
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(ratings[['user_id', 'book_id', 'rating']], reader)

# Train-test split
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Train SVD model
model = SVD()
model.fit(trainset)

# Recommend top N books for a given user
def get_personalized_recommendations(user_id, n=3):
    # Get all books not yet rated by the user
    all_book_ids = books['book_id'].unique()
    rated_books = ratings[ratings['user_id'] == user_id]['book_id'].tolist()
    unrated_books = [book_id for book_id in all_book_ids if book_id not in rated_books]
    
    # Predict ratings for unrated books
    predictions = []
    for book_id in unrated_books:
        pred = model.predict(user_id, book_id)
        predictions.append((book_id, pred.est))

    # Sort by predicted rating
    top_n = sorted(predictions, key=lambda x: x[1], reverse=True)[:n]
    
    # Get book titles
    recommended_books = books[books['book_id'].isin([b[0] for b in top_n])]
    recommended_books = recommended_books.merge(
        pd.DataFrame(top_n, columns=['book_id', 'predicted_rating']),
        on='book_id'
    )
    return recommended_books[['title', 'author', 'predicted_rating']]

# Example usage
if __name__ == "__main__":
    user_id = 1
    print(f"\n Personalized Recommendations for User {user_id}:")
    print(get_personalized_recommendations(user_id))



 Personalized Recommendations for User 1:
                   title          author  predicted_rating
0             The Hobbit  J.R.R. Tolkien          8.066399
1  To Kill a Mockingbird      Harper Lee          8.020123


# Movie Data

In [9]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse

In [10]:
#Load data
columnNames = ['user_id', 'item_id', 'rating', 'timestamp']
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=columnNames)
print(ratings.head())

   user_id  item_id  rating  timestamp
0      196      242       3  881250949
1      186      302       3  891717742
2       22      377       1  878887116
3      244       51       2  880606923
4      166      346       1  886397596


In [11]:
# Convert pandas DataFrame to Surprise format
reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(ratings[['user_id','item_id','rating']], reader)

# Split into train and test data
trainset, testset = train_test_split(data, test_size = 0.25, random_state=42)

# Create SVD model
model = SVD()

# Train the model on training data
model.fit(trainset)

# Test how well the model predicts ratings
predictions = model.test(testset)
# rmse(predictions)
print(rmse(predictions))

RMSE: 0.9409
0.9408539252268319


In [12]:
# Get list of all unique item_ids (movies)
all_items = ratings['item_id'].unique()

# Let's pick a user
user_id = str(100)  # convert to string as required by surprise

# Find movies the user hasn't rated yet
user_rated_items = ratings[ratings['user_id'] == int(user_id)]['item_id']
unseen_items = [item for item in all_items if item not in user_rated_items.values]

# Predict ratings for those unseen movies
predictions = [model.predict(user_id, item_id) for item_id in unseen_items]

# Sort by highest predicted rating
top_5 = sorted(predictions, key=lambda x: x.est, reverse=True)[:5]

print(f"\nTop 5 Recommended Books for User {user_id}:")
for pred in top_5:
    print(f"Book ID: {pred.iid}, Predicted Rating: {round(pred.est, 2)}")



Top 5 Recommended Books for User 100:
Book ID: 483, Predicted Rating: 4.58
Book ID: 408, Predicted Rating: 4.57
Book ID: 318, Predicted Rating: 4.55
Book ID: 64, Predicted Rating: 4.51
Book ID: 169, Predicted Rating: 4.51


# Books Data [Book-Crossing Dataset (Kaggle)]

In [15]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse

# Step 1: Load ratings data
ratings = pd.read_csv('books/Ratings.csv')
print(ratings.columns)
ratings = ratings[['User-ID', 'ISBN', 'Book-Rating']]
ratings.columns = ['user_id', 'book_id', 'rating']

# Step 2: Remove 0 ratings (they mean "not rated")
ratings = ratings[ratings['rating'] > 0]

# # Step 3: Keep only users who have rated at least 20 books (optional)
# user_counts = ratings['user_id'].value_counts()
# ratings = ratings[ratings['user_id'].isin(user_counts[user_counts >= 20].index)]

# Step 4: Convert to Surprise dataset
reader = Reader(rating_scale=(1, 10))  # Book ratings are from 1 to 10
data = Dataset.load_from_df(ratings[['user_id', 'book_id', 'rating']], reader)

# Step 5: Train/test split
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Step 6: Train the SVD model
model = SVD()
model.fit(trainset)

# Build fallback popularity-based model (avg rating + count)
popular_books = (
    ratings.groupby('book_id')
    .agg(avg_rating=('rating', 'mean'), rating_count=('rating', 'count'))
    .reset_index()
)

# Recommend top 5 books by popularity
def fallback_recommendations(top_n=5):
    top_books = popular_books.sort_values(
        by=['rating_count', 'avg_rating'], ascending=False
    ).head(top_n)
    return top_books[['book_id', 'avg_rating', 'rating_count']]

def recommend_books(user_id, top_n=5):
    try:
        user_id = int(user_id)
    except:
        return "Invalid user ID"

    if user_id not in ratings['user_id'].values:
        print(f"\n New user detected. Showing fallback recommendations.")
        return fallback_recommendations(top_n).values.tolist()

    all_books = ratings['book_id'].unique()
    rated_books = ratings[ratings['user_id'] == user_id]['book_id']
    unseen_books = [book for book in all_books if book not in rated_books.values]

    predictions = [model.predict(str(user_id), book_id) for book_id in unseen_books]
    top_preds = sorted(predictions, key=lambda x: x.est, reverse=True)[:top_n]

    return [(pred.iid, round(pred.est, 2)) for pred in top_preds]

'''
# Step 7: Recommend top 5 books to a user
user_id = str(ratings['user_id'].iloc[0])  # pick any user from the dataset

# Get all book IDs
all_books = ratings['book_id'].unique()
rated_books = ratings[ratings['user_id'] == int(user_id)]['book_id']
unseen_books = [book for book in all_books if book not in rated_books.values]

# Predict ratings
predictions = [model.predict(user_id, book_id) for book_id in unseen_books]
top_5 = sorted(predictions, key=lambda x: x.est, reverse=True)[:5]

print(f"\n Top 5 Recommended Books for User {user_id}:")
for pred in top_5:
    print(f"Book ID: {pred.iid}, Predicted Rating: {round(pred.est, 2)}")'''


Index(['User-ID', 'ISBN', 'Book-Rating'], dtype='object')


'\n# Step 7: Recommend top 5 books to a user\nuser_id = str(ratings[\'user_id\'].iloc[0])  # pick any user from the dataset\n\n# Get all book IDs\nall_books = ratings[\'book_id\'].unique()\nrated_books = ratings[ratings[\'user_id\'] == int(user_id)][\'book_id\']\nunseen_books = [book for book in all_books if book not in rated_books.values]\n\n# Predict ratings\npredictions = [model.predict(user_id, book_id) for book_id in unseen_books]\ntop_5 = sorted(predictions, key=lambda x: x.est, reverse=True)[:5]\n\nprint(f"\n Top 5 Recommended Books for User {user_id}:")\nfor pred in top_5:\n    print(f"Book ID: {pred.iid}, Predicted Rating: {round(pred.est, 2)}")'

In [16]:
user_id = input("Enter User ID: ")
result = recommend_books(user_id)

print(f"\nTop Recommendations for User {user_id}:")
for rec in result:
    print(rec)



 New user detected. Showing fallback recommendations.

Top Recommendations for User 1:
['0316666343', 8.185289957567186, 707]
['0971880107', 4.3907056798623065, 581]
['0385504209', 8.435318275154003, 487]
['0312195516', 8.182767624020888, 383]
['0679781587', 8.408408408408409, 333]


In [17]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse


# STEP 1: Load and clean the data


# Load the dataset
ratings = pd.read_csv('books/Ratings.csv')  # Make sure this file exists
ratings = ratings[['User-ID', 'ISBN', 'Book-Rating']]
ratings.columns = ['user_id', 'book_id', 'rating']


# Remove rows with 0 rating (0 = not rated)
ratings = ratings[ratings['rating'] > 0]

# Ensure ratings are integers
ratings['rating'] = ratings['rating'].astype(int)

# # Keep only users with 20+ ratings (optional)
# user_counts = ratings['user_id'].value_counts()
# ratings = ratings[ratings['user_id'].isin(user_counts[user_counts >= 20].index)]

print("Ratings loaded:", len(ratings))


# STEP 2: Prepare data for the Surprise model


reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(ratings[['user_id', 'book_id', 'rating']], reader)


# STEP 3: Train-test split & model


trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

model = SVD()
model.fit(trainset)

# Evaluate accuracy (optional)
predictions = model.test(testset)
rmse(predictions)


# STEP 4: Build fallback (popularity)


popular_books = (
    ratings.groupby('book_id')
    .agg(avg_rating=('rating', 'mean'), rating_count=('rating', 'count'))
    .reset_index()
)

def fallback_recommendations(top_n=5):
    top_books = popular_books.sort_values(
        by=['rating_count', 'avg_rating'], ascending=False
    ).head(top_n)
    return top_books[['book_id', 'avg_rating', 'rating_count']]


# STEP 5: Recommend books (personalized or fallback)


def recommend_books(user_id, top_n=5):
    try:
        user_id = int(user_id)
    except:
        return "Invalid user ID"

    if user_id not in ratings['user_id'].values:
        print(f"\n New user detected. Showing fallback recommendations.")
        return fallback_recommendations(top_n).values.tolist()

    all_books = ratings['book_id'].unique()
    rated_books = ratings[ratings['user_id'] == user_id]['book_id']
    unseen_books = [book for book in all_books if book not in rated_books.values]

    predictions = [model.predict(str(user_id), book_id) for book_id in unseen_books]
    top_preds = sorted(predictions, key=lambda x: x.est, reverse=True)[:top_n]

    return [(pred.iid, round(pred.est, 2)) for pred in top_preds]


# STEP 6: Run the system


user_input = input("🔎 Enter a User ID to get recommendations: ")
recommendations = recommend_books(user_input)

print(f"\nTop 5 Recommendations for User {user_input}:")
for rec in recommendations:
    print(rec)


Ratings loaded: 433671
RMSE: 1.6405

 New user detected. Showing fallback recommendations.

Top 5 Recommendations for User 1:
['0316666343', 8.185289957567186, 707]
['0971880107', 4.3907056798623065, 581]
['0385504209', 8.435318275154003, 487]
['0312195516', 8.182767624020888, 383]
['0679781587', 8.408408408408409, 333]


In [18]:
# !pip install streamlit