movie

In [9]:
pip install sklearn 

Collecting sklearn
  Downloading sklearn-0.0.post12.tar.gz (2.6 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'error'
Note: you may need to restart the kernel to use updated packages.


  error: subprocess-exited-with-error
  
  × Getting requirements to build wheel did not run successfully.
  │ exit code: 1
  ╰─> [15 lines of output]
      The 'sklearn' PyPI package is deprecated, use 'scikit-learn'
      rather than 'sklearn' for pip commands.
      
      Here is how to fix this error in the main use cases:
      - use 'pip install scikit-learn' rather than 'pip install sklearn'
      - replace 'sklearn' by 'scikit-learn' in your pip requirements files
        (requirements.txt, setup.py, setup.cfg, Pipfile, etc ...)
      - if the 'sklearn' package is used by one of your dependencies,
        it would be great if you take some time to track which package uses
        'sklearn' instead of 'scikit-learn' and report it to their issue tracker
      - as a last resort, set the environment variable
        SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True to avoid this error
      
      More information is available at
      https://github.com/scikit-learn/sklearn-

In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from surprise import SVD
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
import seaborn as sns
import matplotlib.pyplot as plt

# Step 1: Load the datasets
movies_metadata = pd.read_csv('/mnt/data/movies_metadata.csv', low_memory=False)
ratings_small = pd.read_csv('/mnt/data/ratings_small.csv')
keywords = pd.read_csv('/mnt/data/keywords.csv')
links = pd.read_csv('/mnt/data/links.csv')
credits = pd.read_csv('/mnt/data/credits.csv')
# You can load the larger ratings dataset later if needed

# Step 2: Data Preprocessing
# Merging datasets
links['tmdbId'] = links['tmdbId'].fillna(0).astype(int)
movies_metadata['id'] = movies_metadata['id'].fillna(0).astype(str)

# Merge movies_metadata with credits and keywords
movies_metadata = movies_metadata.merge(credits, left_on='id', right_on='id', how='left')
movies_metadata = movies_metadata.merge(keywords, left_on='id', right_on='id', how='left')

# Keep relevant columns for recommendation
movies = movies_metadata[['id', 'title', 'genres', 'cast', 'crew', 'keywords', 'vote_average', 'vote_count']]

# Filter movies with sufficient votes for quality
movies = movies[movies['vote_count'] > 50]

# Step 3: Exploratory Data Analysis (EDA)
# Plot rating distribution
plt.figure(figsize=(10, 5))
sns.histplot(ratings_small['rating'], bins=10, kde=True, color='blue')
plt.title('Distribution of Ratings')
plt.xlabel('Rating')
plt.ylabel('Frequency')
plt.show()

# Step 4: Build Recommender System
# Convert ratings_small into Surprise dataset
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings_small[['userId', 'movieId', 'rating']], reader)

# Train a Collaborative Filtering model (SVD)
trainset, testset = train_test_split(data.build_full_trainset().build_testset(), test_size=0.2, random_state=42)

# Initialize SVD and train
model = SVD()
cross_validate(model, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

# Step 5: Make Predictions
# Example: Predict for a specific user and movie
user_id = 1  # Example user ID
movie_id = 31  # Example movie ID
prediction = model.predict(uid=user_id, iid=movie_id)
print(f"Predicted rating for User {user_id} on Movie {movie_id}: {prediction.est}")

# Step 6: Recommend Movies
# Function to recommend movies
def recommend_movies(user_id, model, data, top_n=10):
    # Get all movie IDs
    movie_ids = ratings_small['movieId'].unique()
    user_rated_movies = ratings_small[ratings_small['userId'] == user_id]['movieId']
    unrated_movies = [movie for movie in movie_ids if movie not in user_rated_movies]

    # Predict ratings for all unrated movies
    predictions = [model.predict(user_id, movie_id) for movie_id in unrated_movies]
    predictions.sort(key=lambda x: x.est, reverse=True)

    # Return top N recommendations
    top_predictions = predictions[:top_n]
    recommended_movies = [(pred.iid, pred.est) for pred in top_predictions]
    return recommended_movies

# Get recommendations for a user
recommended_movies = recommend_movies(user_id=1, model=model, data=data)
print("Recommended Movies:", recommended_movies)

# Step 7: Deployment
# Save the model or create a Flask application for deployment (not covered here)


ModuleNotFoundError: No module named 'sklearn'