In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors


In [34]:

# Load the CSV file into a DataFrame
file_path = 'movie_dataset.csv'  # Replace this with your file path
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head())




                       Movie Title  TMDBId  Comedy  Romance  Drama  Action  \
0  The Shawshank Redemption (1994)     278       2        1     10       3   
1             The Godfather (1972)     238       2        3     10       4   
2    The Godfather: Part II (1974)     240       2        3     10       4   
3           The Dark Knight (2008)     155       3        2      8       9   
4              12 Angry Men (1957)     389       2        1     10       1   

   Acting Performance  Engagingness  
0                  10            10  
1                  10             9  
2                  10             9  
3                  10            10  
4                  10             9  


In [40]:
# Define the weights for the features
weights = {'Comedy': 1.5, 'Romance': 1, 'Drama': 2, 'Action': 1, 'Acting Performance': 1.5, 'Engagingness': 1}

# Extracting features and applying weights
# Ensure these columns match exactly those in the CSV
weighted_features = data[list(weights.keys())].copy()

for column, weight in weights.items():
    weighted_features[column] *= weight

In [41]:
# Creating a KNN model
knn_model = NearestNeighbors(n_neighbors=5, algorithm='auto')
knn_model.fit(weighted_features)

In [45]:
# User input for number of movies to find
num_movies = int(input("Enter the number of movies you want to find: "))

# Ask for user ratings, apply weights, and convert to DataFrame with feature names
print("Enter your ratings (from 1 to 10) for each category:")
user_ratings = {}
for category in weights.keys():
    user_ratings[category] = int(input(f"Enter rating for {category}: ")) * weights[category]

# Convert user ratings to a DataFrame
user_ratings_df = pd.DataFrame([user_ratings])

Enter the number of movies you want to find: 3
Enter your ratings (from 1 to 10) for each category:
Enter rating for Comedy: 2
Enter rating for Romance: 1
Enter rating for Drama: 10
Enter rating for Action: 3
Enter rating for Acting Performance: 10
Enter rating for Engagingness: 10


In [46]:
# Finding the nearest neighbors
distances, indices = knn_model.kneighbors(user_ratings_df)

In [47]:
# Displaying movies characteristics based on indices, limited to num_movies
print("\nMovies closest to your criteria:")
for idx in indices[0][:num_movies]:  # Limiting to num_movies
    movie_data = data.iloc[idx]
    print(f"Movie Name: {movie_data['Movie Title']}")  # Corrected to 'Movie Title'
    print("Characteristics:")
    for column in weighted_features.columns:
        print(f"{column}: {movie_data[column]}")  # These are the original, unweighted values
    print("------------------------------------------")


Movies closest to your criteria:
Movie Name: The Shawshank Redemption (1994)
Characteristics:
Comedy: 2
Romance: 1
Drama: 10
Action: 3
Acting Performance: 10
Engagingness: 10
------------------------------------------
Movie Name: 12 Angry Men (1957)
Characteristics:
Comedy: 2
Romance: 1
Drama: 10
Action: 1
Acting Performance: 10
Engagingness: 9
------------------------------------------
Movie Name: Schindler's List (1993)
Characteristics:
Comedy: 1
Romance: 2
Drama: 10
Action: 2
Acting Performance: 10
Engagingness: 9
------------------------------------------
