In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load ratings and movie data
ratings = pd.read_csv('C:/Users/Zenab/Desktop/inn assigment/ml-100k', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])
movies = pd.read_csv('C:/Users/Zenab/Desktop/inn assigment/ml-100k', sep='|', encoding='latin-1', usecols=[0, 1], names=['item_id', 'title'])

# Create item-user matrix (transpose of user-item matrix)
item_user_matrix = ratings.pivot_table(index='item_id', columns='user_id', values='rating').fillna(0)
print("Item-User Matrix Shape:", item_user_matrix.shape)

Item-User Matrix Shape: (1682, 943)


In [3]:
# Calculate cosine similarity between items
item_similarity = cosine_similarity(item_user_matrix)
item_similarity = pd.DataFrame(
    item_similarity,
    index=item_user_matrix.index,
    columns=item_user_matrix.index
)

print("Item Similarity Matrix Shape:", item_similarity.shape)
print("Sample (First 5 Items):")
print(item_similarity.iloc[:5, :5])

Item Similarity Matrix Shape: (1682, 1682)
Sample (First 5 Items):
item_id         1         2         3         4         5
item_id                                                  
1        1.000000  0.402382  0.330245  0.454938  0.286714
2        0.402382  1.000000  0.273069  0.502571  0.318836
3        0.330245  0.273069  1.000000  0.324866  0.212957
4        0.454938  0.502571  0.324866  1.000000  0.334239
5        0.286714  0.318836  0.212957  0.334239  1.000000


In [4]:
def predict_rating(user_id, item_id, k=5):
    """
    Predicts a user's rating for an item using:
    - Weighted average of ratings from top-K similar items
    """
    # Get user's ratings
    user_ratings = item_user_matrix[user_id]
    
    # Get similarity scores for the target item
    sim_scores = item_similarity.loc[item_id]
    
    # Find items the user has rated
    rated_items = user_ratings[user_ratings > 0].index
    
    # Get top-K most similar items that the user rated
    top_k_items = sim_scores[rated_items].sort_values(ascending=False)[:k]
    
    # Calculate weighted average
    weighted_sum = np.dot(
        top_k_items.values,
        item_user_matrix.loc[top_k_items.index, user_id]
    )
    prediction = weighted_sum / top_k_items.sum()
    
    return prediction

In [None]:
def recommend_items(user_id, n=5, k=5):
    """
    Recommends top-N items a user hasn't rated yet.
    """
    # Items the user has already rated
    rated_items = ratings[ratings['user_id'] == user_id]['item_id']
    
    # Predict ratings for unrated items
    predictions = []
    for item in item_user_matrix.index:
        if item not in rated_items.values:
            pred = predict_rating(user_id, item, k=k)
            movie_title = movies[movies['item_id'] == item]['title'].values[0]
            predictions.append((movie_title, pred))
    
    # Sort by predicted rating
    recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)[:n]
    
    return recommendations


user_id = 1
recommendations = recommend_items(user_id, n=5)
print(f"Top 5 Recommendations for User {user_id}:")
for i, (movie, rating) in enumerate(recommendations, 1):
    print(f"{i}. {movie} (Predicted Rating: {rating:.2f})")

Top 5 Recommendations for User 1:
1. English Patient, The (1996) (Predicted Rating: 5.00)
2. Evita (1996) (Predicted Rating: 5.00)
3. Everyone Says I Love You (1996) (Predicted Rating: 5.00)
4. Mother (1996) (Predicted Rating: 5.00)
5. Young Poisoner's Handbook, The (1995) (Predicted Rating: 5.00)
