In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv('reduced_movies.csv')
df.head()

Unnamed: 0,movie_id,overview,actors,title,overview_embedding,PC1,PC2,PC3,PC4,PC5,...,PC2726,PC2727,PC2728,PC2729,PC2730,PC2731,PC2732,PC2733,PC2734,PC2735
0,19995,"In the 22nd century, a paraplegic Marine is di...","['Sam Worthington', 'Zoe Saldana', 'Sigourney ...",Avatar,[ 0.1523016 -0.22653992 -0.00164659 0.246993...,-0.071153,0.177764,-0.49596,0.61942,2.315312,...,-0.549049,0.416125,0.583816,-1.235424,-0.217905,1.392769,0.868042,0.672337,-0.55691,-0.856309
1,285,"Captain Barbossa, long believed to be dead, ha...","['Johnny Depp', 'Orlando Bloom', 'Keira Knight...",Pirates of the Caribbean: At World's End,[-0.03810595 -0.4891934 0.18380359 0.366908...,-0.08469,-0.08495,-0.229105,0.13851,-0.060964,...,0.893432,2.389504,-0.449903,1.119013,-1.458596,-1.093995,0.679739,-0.575387,0.505809,-0.845537
2,206647,A cryptic message from Bond’s past sends him o...,"['Daniel Craig', 'Christoph Waltz', 'Léa Seydo...",Spectre,[-0.01723344 -0.1734568 -0.15426032 0.169259...,-0.155873,0.091269,-0.245384,-0.003114,-0.14712,...,-0.705979,-0.118014,0.797119,0.11219,0.5418,-1.587667,2.604494,-2.320952,0.74606,0.302421
3,49026,Following the death of District Attorney Harve...,"['Christian Bale', 'Michael Caine', 'Gary Oldm...",The Dark Knight Rises,[ 1.17363729e-01 -3.72660667e-01 1.31389931e-...,-0.217352,-0.093446,0.030532,0.064242,-0.066258,...,0.612776,0.977,-0.166704,0.246222,-1.200927,0.215816,-0.202876,-1.985444,0.294265,1.107277
4,49529,"John Carter is a war-weary, former military ca...","['Taylor Kitsch', 'Lynn Collins', 'Samantha Mo...",John Carter,[-7.65719190e-02 -1.36003941e-01 2.19224036e-...,-0.244321,-0.038469,-0.58549,0.247836,0.61206,...,0.726909,0.230572,-0.137563,-0.698721,-0.134045,-0.333373,-0.008612,-0.462235,-0.313791,0.454579


In [3]:
def convert_to_array(embedding_str):
    embedding_list = embedding_str.strip('[]').split()  
    embedding_list = [float(i) for i in embedding_list]  
    return np.array(embedding_list)

df['overview_embedding'] = df['overview_embedding'].apply(convert_to_array)

print(df['overview_embedding'].head())


0    [0.1523016, -0.22653992, -0.00164659, 0.246993...
1    [-0.03810595, -0.4891934, 0.18380359, 0.366908...
2    [-0.01723344, -0.1734568, -0.15426032, 0.16925...
3    [0.117363729, -0.372660667, 0.131389931, 0.434...
4    [-0.076571919, -0.136003941, 0.219224036, 0.27...
Name: overview_embedding, dtype: object


In [4]:
from sklearn.metrics.pairwise import manhattan_distances

In [5]:
import numpy as np

def get_recommendations(df, title, top_n=10):
   
    df['overview_embedding'] = df['overview_embedding'].apply(lambda x: np.array(x) if isinstance(x, list) else x)
    title_lower = title.lower()
    matching_titles = df[df['title'].str.lower() == title_lower]
    
    if matching_titles.empty:
        return f"Error: The movie '{title}' was not found in the dataset."
    
    input_overview_embedding = matching_titles['overview_embedding'].values[0]
    input_pca_components = matching_titles.filter(regex='^PC').values[0]    
    input_vector = np.concatenate([input_overview_embedding, input_pca_components])
    df_combined = df.apply(lambda row: np.concatenate([row['overview_embedding'], row.filter(regex='^PC').values]), axis=1)
    distances = manhattan_distances([input_vector], df_combined.tolist())[0]
    df['manhattan_distance'] = distances
    df_sorted = df.sort_values(by='manhattan_distance')
    top_recommendations = df_sorted[df_sorted['title'].str.lower() != title_lower].head(top_n)
    return top_recommendations[['movie_id', 'title']]


In [6]:
movie_title_input = input("Enter the movie title: ")
recommended_movies = get_recommendations(df, movie_title_input, top_n=10)

print(recommended_movies)

      movie_id                            title
48       81005            Jack the Giant Slayer
4617     73981           Ayurveda: Art of Being
187     417859                    Puss in Boots
1426     14175                          Valiant
3038     17710            Hey Arnold! The Movie
1306    257932      Dragon Nest: Warriors' Dawn
2182     13682           Pooh's Heffalump Movie
2681     15173       Jonah: A VeggieTales Movie
605      59981  Legends of Oz: Dorothy's Return
2292     16110    Thomas and the Magic Railroad
