# Movie Recommender System

### Data Preparation

In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Sample movie data
movies = pd.DataFrame({
    'movie_id': list(range(1, 51)),
    'title': [
        'Movie A', 'Movie B', 'Movie C', 'Movie D', 'Movie E', 'Movie F', 'Movie G', 'Movie H', 'Movie I', 'Movie J',
        'Movie K', 'Movie L', 'Movie M', 'Movie N', 'Movie O', 'Movie P', 'Movie Q', 'Movie R', 'Movie S', 'Movie T',
        'Movie U', 'Movie V', 'Movie W', 'Movie X', 'Movie Y', 'Movie Z', 'Movie AA', 'Movie AB', 'Movie AC', 'Movie AD',
        'Movie AE', 'Movie AF', 'Movie AG', 'Movie AH', 'Movie AI', 'Movie AJ', 'Movie AK', 'Movie AL', 'Movie AM', 'Movie AN',
        'Movie AO', 'Movie AP', 'Movie AQ', 'Movie AR', 'Movie AS', 'Movie AT', 'Movie AU', 'Movie AV', 'Movie AW', 'Movie AX'
    ],
    'genres': [
        'Action|Adventure', 'Action|Sci-Fi', 'Drama', 'Comedy|Romance', 'Horror', 'Adventure|Fantasy', 'Thriller', 'Documentary', 'Animation|Family', 'Biography|Drama',
        'Action|Comedy', 'Crime|Drama', 'Sci-Fi|Thriller', 'Mystery|Thriller', 'Romance|Drama', 'Fantasy|Adventure', 'Musical|Romance', 'Horror|Thriller', 'Drama|War', 'Animation|Adventure',
        'Comedy|Drama', 'Action|Thriller', 'Fantasy|Romance', 'Documentary|Biography', 'Comedy|Musical', 'Action|Fantasy', 'Biography|History', 'Drama|Mystery', 'Crime|Thriller', 'Adventure|Sci-Fi',
        'Drama|Romance', 'Comedy|Fantasy', 'Action|Crime', 'Horror|Comedy', 'Thriller|Mystery', 'Adventure|Animation', 'Drama|Biography', 'Romance|Fantasy', 'Musical|Comedy', 'Action|Adventure|Fantasy',
        'Crime|Comedy', 'Horror|Sci-Fi', 'Drama|Family', 'Comedy|Adventure', 'Thriller|Action', 'Adventure|Drama', 'Fantasy|Comedy', 'Biography|Documentary', 'Family|Animation', 'Romance|Comedy'
    ],
    'plot_keywords': [
        'hero, villain, fight', 'space, future, war', 'love, life, struggle', 'laughter, love, relationships', 'fear, night, scream', 'journey, magic, creature', 'suspense, mystery, chase', 'real, story, truth', 'kids, fun, animated', 'true, inspiring, history',
        'explosion, fun, partner', 'crime, justice, police', 'alien, future, battle', 'detective, crime, mystery', 'love, heartbreak, relationship', 'magic, epic, quest', 'song, dance, love', 'ghost, fear, dark', 'soldier, battle, sacrifice', 'animated, hero, journey',
        'life, funny, poignant', 'spy, chase, danger', 'magic, love, dream', 'true, life, inspiring', 'dance, sing, fun', 'hero, epic, fantasy', 'true, life, struggle', 'secrets, mystery, drama', 'law, crime, detective', 'future, adventure, space',
        'passion, romance, tears', 'imagination, fun, magic', 'heist, crime, plan', 'zombie, humor, survive', 'puzzle, suspense, thriller', 'fun, kids, adventure', 'history, true, drama', 'love, fantasy, magic', 'fun, laughter, dance', 'hero, power, epic',
        'heist, funny, crime', 'alien, horror, space', 'family, drama, life', 'fun, adventure, comedy', 'spy, thriller, danger', 'adventure, quest, hero', 'magic, funny, journey', 'true, real, story', 'animated, family, fun', 'romance, funny, love'
    ]
})

In [3]:
movies.head()

Unnamed: 0,movie_id,title,genres,plot_keywords
0,1,Movie A,Action|Adventure,"hero, villain, fight"
1,2,Movie B,Action|Sci-Fi,"space, future, war"
2,3,Movie C,Drama,"love, life, struggle"
3,4,Movie D,Comedy|Romance,"laughter, love, relationships"
4,5,Movie E,Horror,"fear, night, scream"


In [4]:
# Convert genres to a format suitable for vectorization
movies['genres'] = movies['genres'].str.replace('|', ' ')

In [5]:
movies.head()

Unnamed: 0,movie_id,title,genres,plot_keywords
0,1,Movie A,Action Adventure,"hero, villain, fight"
1,2,Movie B,Action Sci-Fi,"space, future, war"
2,3,Movie C,Drama,"love, life, struggle"
3,4,Movie D,Comedy Romance,"laughter, love, relationships"
4,5,Movie E,Horror,"fear, night, scream"


### Feature Represenatation

In [6]:
# Create TF-IDF vectorizers for genres and plot_keywords
tfidf_genres = TfidfVectorizer()
tfidf_plot_keywords = TfidfVectorizer()

# Fit and transform the data
genres_matrix = tfidf_genres.fit_transform(movies['genres'])
keywords_matrix = tfidf_plot_keywords.fit_transform(movies['plot_keywords'])

### Combine Features & Calculate Similarity

In [7]:
# Combine the features
combined_features = np.hstack([genres_matrix.toarray(), keywords_matrix.toarray()])

# Compute cosine similarity
cosine_sim = cosine_similarity(combined_features, combined_features)

### Recommender Based on Similiarities

In [8]:
def recommend_movies(movie_title, movies, cosine_sim):
    # Get the index of the movie that matches the title
    idx = movies[movies['title'] == movie_title].index[0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return movies['title'].iloc[movie_indices]

# Example usage
recommendationsS = recommend_movies('Movie A', movies, cosine_sim)
print(recommendationsS)

39    Movie AN
45    Movie AT
25     Movie Z
19     Movie T
10     Movie K
21     Movie V
44    Movie AS
43    Movie AR
15     Movie P
5      Movie F
Name: title, dtype: object


In [9]:
# Selecting "Movie A" from the movies DataFrame
movieA = movies.loc[movies['title'] == 'Movie A']

movieA

Unnamed: 0,movie_id,title,genres,plot_keywords
0,1,Movie A,Action Adventure,"hero, villain, fight"


In [10]:
# Select specific rows from the original DataFrame using their indices
indices = [39, 45, 25, 19, 10, 21, 44, 43, 15, 5]
selected_movies_from_original = movies.iloc[indices]

# Selected DataFrame
selected_movies_from_original

Unnamed: 0,movie_id,title,genres,plot_keywords
39,40,Movie AN,Action Adventure Fantasy,"hero, power, epic"
45,46,Movie AT,Adventure Drama,"adventure, quest, hero"
25,26,Movie Z,Action Fantasy,"hero, epic, fantasy"
19,20,Movie T,Animation Adventure,"animated, hero, journey"
10,11,Movie K,Action Comedy,"explosion, fun, partner"
21,22,Movie V,Action Thriller,"spy, chase, danger"
44,45,Movie AS,Thriller Action,"spy, thriller, danger"
43,44,Movie AR,Comedy Adventure,"fun, adventure, comedy"
15,16,Movie P,Fantasy Adventure,"magic, epic, quest"
5,6,Movie F,Adventure Fantasy,"journey, magic, creature"


### Recommender Based on Key-words

In [11]:
# Recommender function
def recommend_movies_by_keywords(input_keywords, movies, cosine_sim):
    # Transform the input keywords to match the vectorizer's format
    input_keywords_vec = tfidf_plot_keywords.transform([input_keywords]).toarray()
    
    # Use the same feature matrix as before for genres
    input_combined_features = np.hstack([np.zeros(genres_matrix.shape[1]), input_keywords_vec.flatten()])

    # Compute cosine similarity between input keywords and all movies
    sim_scores = cosine_similarity([input_combined_features], combined_features).flatten()

    # Get the indices of the most similar movies
    sim_scores_indices = sim_scores.argsort()[-11:-1][::-1]  # Top 10 excluding the input itself

    # Return the titles of the most similar movies
    return movies['title'].iloc[sim_scores_indices]

# Example usage
input_keywords = "secrets, history, true"
recommendationsk = recommend_movies_by_keywords(input_keywords, movies, cosine_sim)
print(recommendationsk)

9      Movie J
27    Movie AB
26    Movie AA
23     Movie X
47    Movie AV
3      Movie D
4      Movie E
20     Movie U
19     Movie T
1      Movie B
Name: title, dtype: object


In [12]:
# Selecting "Movie J" from the movies DataFrame
selected_movie = movies.loc[movies['title'] == 'Movie J']

selected_movie

Unnamed: 0,movie_id,title,genres,plot_keywords
9,10,Movie J,Biography Drama,"true, inspiring, history"
