In [1]:
# Import relevant packages.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors

In [4]:
# Load the IMDB data set

movies = pd.read_csv("https://github.com/ArinB/MSBA-CA-Data/raw/main/CA05/movies_recommendation_data.csv")
movies.head()

Unnamed: 0,Movie ID,Movie Name,IMDB Rating,Biography,Drama,Thriller,Comedy,Crime,Mystery,History,Label
0,58,The Imitation Game,8.0,1,1,1,0,0,0,0,0
1,8,Ex Machina,7.7,0,1,0,0,0,1,0,0
2,46,A Beautiful Mind,8.2,1,1,0,0,0,0,0,0
3,62,Good Will Hunting,8.3,0,1,0,0,0,0,0,0
4,97,Forrest Gump,8.8,0,1,0,0,0,0,0,0


In [5]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Movie ID     30 non-null     int64  
 1   Movie Name   30 non-null     object 
 2   IMDB Rating  30 non-null     float64
 3   Biography    30 non-null     int64  
 4   Drama        30 non-null     int64  
 5   Thriller     30 non-null     int64  
 6   Comedy       30 non-null     int64  
 7   Crime        30 non-null     int64  
 8   Mystery      30 non-null     int64  
 9   History      30 non-null     int64  
 10  Label        30 non-null     int64  
dtypes: float64(1), int64(9), object(1)
memory usage: 2.7+ KB


In [9]:
movies['IMDB Rating'].mean()

7.696666666666667

In [12]:
# Extract the features from the dataset. According to the instructions, we'll use all columns except 
# the first two (MovieID and MovieName): 
features = movies.iloc[:, 2:]

In [13]:
# Initialize the KNN model with 5 neighbors and the distance metric set to 'euclidean':

knn_model = NearestNeighbors(n_neighbors=5, metric='euclidean')


In [14]:
# Fit the KNN model with the features.

knn_model.fit(features)


In [15]:
# Create a function that takes a movie index and returns the indices of the 5 most similar movies:

def recommend_movie_indices(movie_index, knn_model, features):
    distances, indices = knn_model.kneighbors(features.loc[movie_index].values.reshape(1, -1))
    return indices.flatten()


In [20]:
# Create a function to recommend movies based on a given movie name:

def recommend_movies(movie_name, knn_model, movies, features):
    movie_index = movies[movies['Movie Name'] == movie_name].index[0]
    recommended_movie_indices = recommend_movie_indices(movie_index, knn_model, features)
    
    print(f"Movies similar to '{movies.loc[movie_index, 'Movie Name']}':")
    
    for index in recommended_movie_indices:
        if index != movie_index:
            print(f"- {movies.loc[index, 'Movie Name']}")


In [21]:
# Test the recommender system by providing a movie name:

movie_name = "Ex Machina"  # Change this value to test with other movie names
recommend_movies(movie_name, knn_model, movies, features)


Movies similar to 'Ex Machina':
- Gifted
- Stand and Deliver
- Finding Forrester
- A Brilliant Young Mind


