<a href="https://colab.research.google.com/github/paul20301/Real-Time-Recommendation-Model/blob/main/Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from surprise import Dataset, Reader

url = "https://files.grouplens.org/datasets/movielens/ml-100k/u.data"
columns = ['user_id', 'item_id', 'rating', 'timestamp']
data = pd.read_csv(url, sep='\t', names=columns)

print(data.head())

reader = Reader(rating_scale=(1, 5))
data_surprise = Dataset.load_from_df(data[['user_id', 'item_id', 'rating']], reader)


   user_id  item_id  rating  timestamp
0      196      242       3  881250949
1      186      302       3  891717742
2       22      377       1  878887116
3      244       51       2  880606923
4      166      346       1  886397596


In [3]:
from surprise import SVD
from surprise.model_selection import cross_validate

model = SVD()
cross_validate(model, data_surprise, cv=5, verbose=True)

trainset = data_surprise.build_full_trainset()
model.fit(trainset)

user_id, item_id = 1, 50
predicted_rating = model.predict(user_id, item_id)
print(f"Predicted rating for User {user_id} and Item {item_id}: {predicted_rating.est}")


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9387  0.9347  0.9304  0.9372  0.9372  0.9357  0.0029  
MAE (testset)     0.7381  0.7386  0.7326  0.7374  0.7394  0.7372  0.0024  
Fit time          1.47    1.31    1.30    1.62    1.29    1.40    0.13    
Test time         0.09    0.09    0.23    0.09    0.11    0.12    0.05    
Predicted rating for User 1 and Item 50: 4.5488952753675544


In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

item_metadata = pd.DataFrame({
    'item_id': [1, 50, 100, 200],
    'tags': ['action, adventure', 'comedy, drama', 'horror, thriller', 'romance, comedy']
})

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(item_metadata['tags'])

cosine_sim = cosine_similarity(tfidf_matrix)

def recommend_content_based(item_id, cosine_sim, top_n=5):
    item_idx = item_metadata[item_metadata['item_id'] == item_id].index[0]
    similar_items = list(enumerate(cosine_sim[item_idx]))
    similar_items = sorted(similar_items, key=lambda x: x[1], reverse=True)
    recommended_items = [item_metadata.iloc[i[0]]['item_id'] for i in similar_items[1:top_n+1]]
    return recommended_items


recommended_items = recommend_content_based(1, cosine_sim)
print(f"Recommended items for Item 1: {recommended_items}")


Recommended items for Item 1: [50, 100, 200]


In [5]:
def hybrid_recommend(user_id, item_id, model, cosine_sim, alpha=0.5):

    collaborative_score = model.predict(user_id, item_id).est


    if item_id in item_metadata['item_id'].values:
        content_recommendations = recommend_content_based(item_id, cosine_sim, top_n=1)
        content_score = 1.0 if item_id in content_recommendations else 0.0
    else:
        content_score = 0.0


    hybrid_score = alpha * collaborative_score + (1 - alpha) * content_score
    return hybrid_score


hybrid_score = hybrid_recommend(1, 50, model, cosine_sim)
print(f"Hybrid score for User 1 and Item 50: {hybrid_score}")


Hybrid score for User 1 and Item 50: 2.2744476376837772


In [6]:
def recommend_top_n(user_id, model, cosine_sim, n=5):

    all_items = item_metadata['item_id'].tolist()
    scores = [(item, hybrid_recommend(user_id, item, model, cosine_sim)) for item in all_items]
    top_items = sorted(scores, key=lambda x: x[1], reverse=True)[:n]
    return [item[0] for item in top_items]


top_n_recommendations = recommend_top_n(1, model, cosine_sim)
print(f"Top-{len(top_n_recommendations)} recommendations for User 1: {top_n_recommendations}")


Top-4 recommendations for User 1: [100, 50, 1, 200]


In [7]:
from surprise import accuracy
from surprise.model_selection import train_test_split

trainset, testset = train_test_split(data_surprise, test_size=0.2)
model.fit(trainset)
predictions = model.test(testset)

rmse = accuracy.rmse(predictions)
print(f"RMSE: {rmse}")


RMSE: 0.9386
RMSE: 0.9386316892706233


In [11]:
def recommend_for_user(user_id, model, cosine_sim, top_n=5):

    recommended_items = recommend_top_n(user_id, model, cosine_sim, n=top_n)
    return recommended_items


In [13]:

user_id = int(input("Enter User ID: "))

recommendations = recommend_for_user(user_id, model, cosine_sim, top_n=5)

print(f"Top {len(recommendations)} recommendations for User {user_id}: {recommendations}")


Enter User ID: 2
Top 4 recommendations for User 2: [100, 50, 200, 1]


In [14]:
from ipywidgets import interact

def interactive_recommendation(user_id):
    recommendations = recommend_for_user(user_id, model, cosine_sim, top_n=5)
    print(f"Top {len(recommendations)} recommendations for User {user_id}: {recommendations}")

interact(interactive_recommendation, user_id=(1, 100))


interactive(children=(IntSlider(value=50, description='user_id', min=1), Output()), _dom_classes=('widget-inte…