## 1. Loading the Data

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the cleaned data
movies_data = pd.read_csv('data/cleaned_movies_data.csv')
tv_data = pd.read_csv('data/cleaned_series_data.csv')

## 2. TF-IDF Vectorization for Text Features

In [9]:
movies_data['overview'].fillna('', inplace=True)
tv_data['overview'].fillna('', inplace=True)

In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer

# TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the overview column for movies
tfidf_matrix_movies = tfidf_vectorizer.fit_transform(movies_data['overview'])

# Fit and transform the overview column for TV series
tfidf_matrix_tv = tfidf_vectorizer.fit_transform(tv_data['overview'])

## 3. Calculate Cosine Similarity


In [11]:
from sklearn.metrics.pairwise import cosine_similarity

### Compute the cosine similarity matrix for movies
cosine_sim_movies = cosine_similarity(tfidf_matrix_movies, tfidf_matrix_movies)

### Compute the cosine similarity matrix for TV series
cosine_sim_tv = cosine_similarity(tfidf_matrix_tv, tfidf_matrix_tv)

## 4. Create a Function to Get Recommendations


In [12]:
# Function to get movie recommendations
def get_movie_recommendations(title, cosine_sim=cosine_sim_movies):
    # Get the index of the movie that matches the title
    idx = movies_data[movies_data['title'] == title].index[0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return movies_data['title'].iloc[movie_indices]

# Function to get TV series recommendations
def get_tv_recommendations(name, cosine_sim=cosine_sim_tv):
    # Get the index of the TV series that matches the name
    idx = tv_data[tv_data['name'] == name].index[0]

    # Get the pairwise similarity scores of all TV series with that TV series
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the TV series based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar TV series
    sim_scores = sim_scores[1:11]

    # Get the TV series indices
    tv_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar TV series
    return tv_data['name'].iloc[tv_indices]

## 5. Test the Recommendation Functions

In [13]:
# Test movie recommendations
print(get_movie_recommendations('Inside Out 2'))

# Test TV series recommendations
print(get_tv_recommendations('House of the Dragon'))

19                     Inside Out
387           Riley's First Date?
5119          Secret Headquarters
11436                       Awake
7073     My Teacher, My Obsession
7230                 Suzhou River
12277                      Equals
10309              Arlington Road
11862                   Baby Boom
7734                     The Maid
Name: title, dtype: object
8204       Requiem of the Rose King
2142    Magical Girl Lyrical Nanoha
8729             Dungeons & Dragons
1885               Ranking of Kings
1008           Suicide Squad Isekai
6129                The Grand Canal
6861              The Crowned Clown
8251                   Bloody Heart
5989                    Love Family
946                        Mirzapur
Name: name, dtype: object
