In [7]:
import django_jupyter
django_jupyter.init()

In [12]:
from movies.models import Movie
from django.contrib.auth import get_user_model

User = get_user_model

In [17]:
def print_movie_genres(movie_id):
    """
    Print the genres associated with the movie.
    """
    try:
        movie = Movie.objects.get(id=movie_id)
        movie_genres = [genre.name for genre in movie.genres.all()]
        print(f"Movie ID: {movie_id} | Movie's Genres: {movie_genres}")
        return movie_genres
    except Movie.DoesNotExist:
        print(f"Movie with ID {movie_id} does not exist.")
        return []

# Test the function
movie_genres = print_movie_genres(movie_id=36120)  # Replace with an actual movie_id


Movie ID: 36120 | Movie's Genres: ['Drama', 'Thriller', 'Horror', 'Mystery']


In [19]:
import json

def print_user_genres(user_id):
    """
    Print the genres preferred by the user.
    """
    try:
        user = User.objects.get(id=user_id)
        # Parse the preferences JSON string into a Python dictionary
        preferences = json.loads(user.preferences)
        user_genres = preferences.get('genres', [])
        print(f"User ID: {user_id} | User's Preferred Genres: {user_genres}")
        return user_genres
    except User.DoesNotExist:
        print(f"User with ID {user_id} does not exist.")
        return []
    except json.JSONDecodeError:
        print(f"Error decoding JSON for user ID {user_id}.")
        return []

# Test the function
user_genres = print_user_genres(user_id=2653)  # Replace with an actual user_id



User ID: 2653 | User's Preferred Genres: ['Horror', 'Comedy', 'Romance', 'Thriller', 'Drama']


In [20]:
def calculate_genre_matching_score(user_id, movie_id):
    """
    Calculate the Genre Matching Score based on user's preferred genres and the movie's genres.
    """
    # Get the user's preferred genres
    user_genres = print_user_genres(user_id)
    
    # Get the movie's genres
    movie_genres = print_movie_genres(movie_id)
    
    # Calculate the matching score
    matching_genres = set(user_genres).intersection(set(movie_genres))
    genre_matching_score = len(matching_genres)
    
    print(f"User's Preferred Genres: {user_genres}")
    print(f"Movie's Genres: {movie_genres}")
    print(f"Genre Matching Score: {genre_matching_score}")
    
    return genre_matching_score

# Test the function
calculate_genre_matching_score(user_id=2653, movie_id=36120)  # Replace with actual IDs


User ID: 2653 | User's Preferred Genres: ['Horror', 'Comedy', 'Romance', 'Thriller', 'Drama']
Movie ID: 36120 | Movie's Genres: ['Drama', 'Thriller', 'Horror', 'Mystery']
User's Preferred Genres: ['Horror', 'Comedy', 'Romance', 'Thriller', 'Drama']
Movie's Genres: ['Drama', 'Thriller', 'Horror', 'Mystery']
Genre Matching Score: 3


3

In [29]:
from django.contrib.auth import get_user_model

User = get_user_model()

# Test if Django models are accessible
print(User.objects.count(), "users found in the database.")


5630 users found in the database.


In [30]:
def test_database_access():
    print("Testing database access...")
    ratings_query = Rating.objects.all()[:5]  # Fetch only 5 records for testing
    for rating in ratings_query:
        print(rating.user_id, rating.movie_id, rating.score)

test_database_access()


Testing database access...
4 2114 10.0
4 3060 10.0
4 3071 8.0
4 3169 8.0
4 3208 4.0


In [None]:
from surprise import Dataset, SVD, Reader
from surprise.model_selection import train_test_split
from django.contrib.auth import get_user_model
from ratings.models import Rating
from movies.models import Movie
import pandas as pd
import pickle
import json

User = get_user_model()

def get_ratings_dataset_with_genre_matching():
    """
    Fetches ratings data from the database and adds a Genre Matching Score.
    """
    print("Fetching ratings data from the database...")  # Debugging statement
    ratings_query = Rating.objects.all().values('user_id', 'movie_id', 'score')
    data = []
    
    print(f"Total ratings fetched: {len(ratings_query)}")  # Debugging statement
    
    for rating in ratings_query:
        user_id = rating['user_id']
        movie_id = rating['movie_id']
        score = rating['score']
        
        # Fetch user and movie genres
        user = User.objects.get(id=user_id)
        
        # Ensure that preferences are treated as a dictionary
        if isinstance(user.preferences, str):
            user.preferences = json.loads(user.preferences)
        
        user_genres = user.preferences.get('genres', [])
        movie_genres = list(Movie.objects.get(id=movie_id).genres.values_list('name', flat=True))
        
        print(f"User ID: {user_id} | User's Preferred Genres: {user_genres}")
        print(f"Movie ID: {movie_id} | Movie's Genres: {movie_genres}")
        
        genre_matching_score = len(set(user_genres) & set(movie_genres))
        
        # Adding genre matching score to the rating score
        adjusted_score = float(score) + genre_matching_score
        
        data.append((user_id, movie_id, adjusted_score))
    
    # Creating DataFrame
    ratings_df = pd.DataFrame(data, columns=['user', 'item', 'rating'])
    
    # Debugging prints
    print("Ratings DataFrame with Genre Matching Scores:")
    print(ratings_df.head())
    
    # Define the reader with the rating scale
    print("Defining Reader with rating scale...")
    reader = Reader(rating_scale=(0, 10 + max(ratings_df['rating'])))
    
    print("Loading dataset into Surprise format...")
    data = Dataset.load_from_df(ratings_df[['user', 'item', 'rating']], reader)
    
    return data

def train_model_with_genre_matching():
    """
    Trains the SVD model using the dataset with Genre Matching Scores and saves the trained model.
    """
    print("Starting the training process...")
    
    # Load the dataset with genre matching score
    data = get_ratings_dataset_with_genre_matching()
    
    # Train-test split
    print("Splitting data into training and test sets...")
    trainset, testset = train_test_split(data, test_size=0.2)
    
    print("Training the model with SVD algorithm...")
    algo = SVD(n_epochs=20, lr_all=0.01, reg_all=0.2)
    
    algo.fit(trainset)
    
    print("Model trained successfully.")
    
    # Save the trained model to a file
    model_path = r'C:\Users\Melarc.py\Documents\GitHub\FlixFinder\recommendation\models\trained_model_with_genre_matching.pkl'
    with open(model_path, 'wb') as file:
        pickle.dump(algo, file)
    
    print(f"Model trained with Genre Matching Score and saved successfully at {model_path}!")

# Run the function
train_model_with_genre_matching()


Starting the training process...
Fetching ratings data from the database...
Total ratings fetched: 37727
User ID: 4 | User's Preferred Genres: ['Drama', 'Action']
Movie ID: 2114 | Movie's Genres: ['Drama']
User ID: 4 | User's Preferred Genres: ['Drama', 'Action']
Movie ID: 3060 | Movie's Genres: ['Comedy']
User ID: 4 | User's Preferred Genres: ['Drama', 'Action']
Movie ID: 3071 | Movie's Genres: ['Family', 'Documentary']
User ID: 4 | User's Preferred Genres: ['Drama', 'Action']
Movie ID: 3169 | Movie's Genres: ['Animation', 'Family']
User ID: 4 | User's Preferred Genres: ['Drama', 'Action']
Movie ID: 3208 | Movie's Genres: ['Comedy', 'Romance', 'Drama', 'Science Fiction']
User ID: 8 | User's Preferred Genres: ['Fantasy', 'Romance', 'Thriller', 'Comedy', 'Drama']
Movie ID: 5650 | Movie's Genres: ['Comedy', 'Drama']
User ID: 10 | User's Preferred Genres: ['Romance', 'Horror', 'Action']
Movie ID: 735 | Movie's Genres: ['Unknown']
User ID: 11 | User's Preferred Genres: ['Action', 'Comedy',

In [35]:
from surprise import Dataset, SVD, Reader
from surprise.model_selection import train_test_split
from django.contrib.auth import get_user_model
from ratings.models import Rating
from movies.models import Movie
import pandas as pd
import pickle
import json

User = get_user_model()

def get_ratings_dataset_with_genre_matching():
    """
    Fetches ratings data from the database and adds a Genre Matching Score.
    """
    ratings_query = Rating.objects.all().values('user_id', 'movie_id', 'score')
    data = []
    
    for rating in ratings_query:
        user_id = rating['user_id']
        movie_id = rating['movie_id']
        score = rating['score']
        
        # Fetch user and movie genres
        user = User.objects.get(id=user_id)
        
        # Ensure that preferences are treated as a dictionary
        if isinstance(user.preferences, str):
            user.preferences = json.loads(user.preferences)
        
        user_genres = user.preferences.get('genres', [])
        movie_genres = list(Movie.objects.get(id=movie_id).genres.values_list('name', flat=True))
        
        genre_matching_score = len(set(user_genres) & set(movie_genres))
        
        # Adding genre matching score to the rating score
        adjusted_score = float(score) + genre_matching_score
        
        data.append((user_id, movie_id, adjusted_score))
    
    # Creating DataFrame
    ratings_df = pd.DataFrame(data, columns=['user', 'item', 'rating'])
    
    # Define the reader with the rating scale
    reader = Reader(rating_scale=(0, 10 + max(ratings_df['rating'])))
    
    # Load the dataset into Surprise format
    data = Dataset.load_from_df(ratings_df[['user', 'item', 'rating']], reader)
    
    return data

def train_model_with_genre_matching():
    """
    Trains the SVD model using the dataset with Genre Matching Scores and saves the trained model.
    """
    # Load the dataset with genre matching score
    data = get_ratings_dataset_with_genre_matching()
    
    # Train-test split
    trainset, testset = train_test_split(data, test_size=0.2)
    
    # Train the SVD algorithm with optimized parameters
    algo = SVD(n_epochs=20, lr_all=0.01, reg_all=0.2)
    algo.fit(trainset)
    
    # Save the trained model to a file
    model_path = r'C:\Users\Melarc.py\Documents\GitHub\Backend\FlixFinder\recommendation\models\trained_model_with_genre_matching.pkl'
    with open(model_path, 'wb') as file:
        pickle.dump(algo, file)
    
    # Print a completion message
    print(f"Model training completed and saved successfully at {model_path}!")

# Run the function
train_model_with_genre_matching()


Model training completed and saved successfully at C:\Users\Melarc.py\Documents\GitHub\Backend\FlixFinder\recommendation\models\trained_model_with_genre_matching.pkl!


#### Hyperparameter Tuning with GridSearchCV

In [37]:
from surprise.model_selection import GridSearchCV
from surprise import SVD

def perform_hyperparameter_tuning():
    # Load the dataset with genre matching score
    data = get_ratings_dataset_with_genre_matching()
    
    # Define the parameter grid
    param_grid = {
        'n_epochs': [10, 20, 30],
        'lr_all': [0.005, 0.01, 0.02],
        'reg_all': [0.1, 0.2, 0.4]
    }
    
    # Perform grid search
    grid_search = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=5)
    grid_search.fit(data)
    
    # Output the best RMSE score and corresponding parameters
    print(f"Best RMSE score attained: {grid_search.best_score['rmse']}")
    print(f"Parameters that gave the best RMSE score: {grid_search.best_params['rmse']}")

    return grid_search.best_params['rmse']

# Run the hyperparameter tuning
best_params = perform_hyperparameter_tuning()


Best RMSE score attained: 1.8172603956125588
Parameters that gave the best RMSE score: {'n_epochs': 10, 'lr_all': 0.02, 'reg_all': 0.2}


In [39]:
from surprise import SVD, accuracy
from surprise.model_selection import train_test_split
import pickle

def train_optimized_model():
    # Load the dataset with genre matching score
    data = get_ratings_dataset_with_genre_matching()
    
    # Train-test split
    trainset, testset = train_test_split(data, test_size=0.2)
    
    # Initialize the SVD algorithm with the best parameters
    algo = SVD(n_epochs=10, lr_all=0.02, reg_all=0.2)
    
    # Train the algorithm on the trainset
    algo.fit(trainset)
    
    # Save the trained model
    model_path = r'C:\Users\Melarc.py\Documents\GitHub\Backend\FlixFinder\recommendation\models\optimized_trained_model_with_genre_matching.pkl'
    with open(model_path, 'wb') as file:
        pickle.dump(algo, file)
    
    print(f"Optimized model trained and saved successfully at {model_path}!")

    # Evaluate the model on the test set
    predictions = algo.test(testset)
    
    # Calculate RMSE and MAE
    rmse = accuracy.rmse(predictions)
    mae = accuracy.mae(predictions)
    
    print(f"Evaluation completed. RMSE: {rmse}, MAE: {mae}")

# Run the function to train and evaluate the optimized model
train_optimized_model()


Optimized model trained and saved successfully at C:\Users\Melarc.py\Documents\GitHub\Backend\FlixFinder\recommendation\models\optimized_trained_model_with_genre_matching.pkl!
RMSE: 1.8175
MAE:  1.3989
Evaluation completed. RMSE: 1.8175353534541847, MAE: 1.3988781552113536
