In [7]:
#----------code by Md Toufikzaman--------
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load datasets
movies = pd.read_csv('movies.csv')
tags = pd.read_csv('tags.csv')
ratings = pd.read_csv('ratings.csv')

# Prepare Content-Based Filtering
movies = movies.merge(tags.groupby('movieId')['tag'].apply(lambda x: ' '.join(x)).reset_index(), on='movieId', how='left')
movies['tag'] = movies['tag'].fillna('')
movies['content'] = movies['genres'] + ' ' + movies['tag']
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['content'])
content_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
content_indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

# Prepare Collaborative Filtering
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Filter movies in the user-item matrix to match those in the content matrix
user_item_matrix = user_item_matrix.loc[:, user_item_matrix.columns.isin(movies['movieId'])]
movies = movies[movies['movieId'].isin(user_item_matrix.columns)]

# Recalculate content similarity matrix with the filtered movies
tfidf_matrix = tfidf.fit_transform(movies['content'])
content_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
content_indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

# Compute collaborative filtering similarity
item_similarity = cosine_similarity(user_item_matrix.T)
collab_sim_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

# Hybrid Recommendation Function
def get_hybrid_recommendations(movie_title, user_id, alpha=0.5):
    if movie_title not in content_indices:
        return "Movie not found in the dataset."
    
    movie_idx = content_indices[movie_title]
    content_scores = content_sim[movie_idx]
    
    if user_id not in user_item_matrix.index:
        return "User not found in the dataset."
    
    user_ratings = user_item_matrix.loc[user_id]
    collab_scores = collab_sim_df.dot(user_ratings).div(collab_sim_df.sum(axis=1))
    
    # Combine content and collaborative scores
    combined_scores = alpha * content_scores + (1 - alpha) * collab_scores.values
    top_indices = combined_scores.argsort()[-11:][::-1]
    
    return movies['title'].iloc[top_indices[1:]].tolist()

# Example usage
hybrid_recommendations = get_hybrid_recommendations('Die Hard (1988)', user_id=1)
print(hybrid_recommendations)


['Supercop 2 (Project S) (Chao ji ji hua) (1993)', 'Welcome to Woop-Woop (1997)', 'In Too Deep (1999)', 'Righteous Kill (2008)', "Gulliver's Travels (1939)", 'Bangkok Dangerous (2008)', 'Venom (1982)', 'Assignment, The (1997)', 'K-9 (1989)', 'Assassination (1987)']


In [8]:
import pandas as pd

# Assuming we already have the hybrid recommendation scores, let's simulate some data
# This is a representation, not actual results from the recommendation system

# Sample data for the top 10 recommended movies
data = {
    'Movie Title': [
        'Lethal Weapon (1987)', 'Die Hard 2 (1990)', 'Predator (1987)', 
        'Speed (1994)', 'True Lies (1994)', 'Terminator 2: Judgment Day (1991)',
        'RoboCop (1987)', 'The Rock (1996)', 'Commando (1985)', 'Mad Max 2: The Road Warrior (1981)'
    ],
    'Content-Based Score': [0.85, 0.83, 0.82, 0.80, 0.78, 0.77, 0.76, 0.75, 0.74, 0.73],
    'Collaborative Filtering Score': [4.5, 4.4, 4.3, 4.2, 4.1, 4.0, 3.9, 3.8, 3.7, 3.6],
    'Hybrid Score': [4.67, 4.61, 4.56, 4.50, 4.44, 4.39, 4.33, 4.28, 4.22, 4.16]
}

# Create a DataFrame to display the results
df = pd.DataFrame(data)

# Display the DataFrame
df


Unnamed: 0,Movie Title,Content-Based Score,Collaborative Filtering Score,Hybrid Score
0,Lethal Weapon (1987),0.85,4.5,4.67
1,Die Hard 2 (1990),0.83,4.4,4.61
2,Predator (1987),0.82,4.3,4.56
3,Speed (1994),0.8,4.2,4.5
4,True Lies (1994),0.78,4.1,4.44
5,Terminator 2: Judgment Day (1991),0.77,4.0,4.39
6,RoboCop (1987),0.76,3.9,4.33
7,The Rock (1996),0.75,3.8,4.28
8,Commando (1985),0.74,3.7,4.22
9,Mad Max 2: The Road Warrior (1981),0.73,3.6,4.16


In [12]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd

# Sample data for testing
ratings = pd.DataFrame({
    'userId': [1, 1, 2, 2],
    'movieId': [1, 2, 1, 2],
    'rating': [4, 3, 5, 2]
})

def get_hybrid_recommendations(movie_id, user_id):
    # Placeholder function: replace with actual recommendation logic
    # Example recommendations might be based on movie_id
    return [movie_id + 1, movie_id + 2]

def get_prediction_for_movie(movie_id):
    # Placeholder function: replace with actual prediction logic
    # Example predictions could be static for demonstration
    return 4.0

def get_predictions(user_id, movie_id):
    recommendations = get_hybrid_recommendations(movie_id, user_id)
    true_ratings = ratings[ratings['userId'] == user_id]
    
    # Check if there are true ratings available
    if true_ratings.empty:
        raise ValueError("No true ratings available for the given user_id.")
    
    true_ratings = true_ratings['rating'].values
    print("True Ratings:", true_ratings)
    
    # Ensure we only compare predictions for recommended movies that have true ratings
    predicted_ratings = np.array([get_prediction_for_movie(m) for m in recommendations])
    
    if len(true_ratings) != len(predicted_ratings):
        raise ValueError("Length of true_ratings does not match length of predicted_ratings.")
    
    mae = mean_absolute_error(true_ratings, predicted_ratings[:len(true_ratings)])
    rmse = np.sqrt(mean_squared_error(true_ratings, predicted_ratings[:len(true_ratings)]))
    
    return mae, rmse

user_id = 1
movie_id = 1

try:
    mae, rmse = get_predictions(user_id, movie_id)
    print(f"MAE: {mae}, RMSE: {rmse}")
except Exception as e:
    print(f"Error: {e}")


True Ratings: [4 3]
MAE: 0.5, RMSE: 0.7071067811865476
