In [8]:
# Import required libraries
import numpy as np
import pandas as pd
import hvplot.pandas
from pathlib import Path
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [9]:
# Read data
file_path = Path("movies_merged.csv")
df = pd.read_csv(file_path)

# Display sample data
df.head()

Unnamed: 0,title,startYear,primaryName,runtimeMinutes,averageRating,numVotes,genre_Action,genre_Adventure,genre_Animation,genre_Biography,...,category_archive_footage,category_archive_sound,category_cinematographer,category_composer,category_director,category_editor,category_producer,category_production_designer,category_self,category_writer
0,Criminal Network,2023,Breand�n O'Connor,75,4.3,57,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0
1,Reminiscing Shadows,2023,Toy Cook,75,3.2,38,1,0,0,0,...,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0
2,Killer Kites,2023,Zach Lee,72,4.6,15,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0.0,1.0,0.0,0.0,0.0
3,Njaan Ippo Entha Cheyya?,2023,Geetha Kailasam,109,6.5,840,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0
4,Motion Detected,2023,Bob Clendenin,80,3.2,201,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0


In [10]:
# filter the data so we only use movies starting at 1970
df = df[df['startYear'] >= 1970]

In [11]:
df.isnull().sum()

title                           0
startYear                       0
primaryName                     0
runtimeMinutes                  0
averageRating                   0
numVotes                        0
genre_Action                    0
genre_Adventure                 0
genre_Animation                 0
genre_Biography                 0
genre_Comedy                    0
genre_Crime                     0
genre_Documentary               0
genre_Drama                     0
genre_Family                    0
genre_Fantasy                   0
genre_Film-Noir                 0
genre_History                   0
genre_Horror                    0
genre_Music                     0
genre_Musical                   0
genre_Mystery                   0
genre_News                      0
genre_Reality-TV                0
genre_Romance                   0
genre_Sci-Fi                    0
genre_Sport                     0
genre_Talk-Show                 0
genre_Thriller                  0
genre_War     

In [12]:
column_names = df.columns.tolist()
print("Column names in the DataFrame:")
print(column_names)

Column names in the DataFrame:
['title', 'startYear', 'primaryName', 'runtimeMinutes', 'averageRating', 'numVotes', 'genre_Action', 'genre_Adventure', 'genre_Animation', 'genre_Biography', 'genre_Comedy', 'genre_Crime', 'genre_Documentary', 'genre_Drama', 'genre_Family', 'genre_Fantasy', 'genre_Film-Noir', 'genre_History', 'genre_Horror', 'genre_Music', 'genre_Musical', 'genre_Mystery', 'genre_News', 'genre_Reality-TV', 'genre_Romance', 'genre_Sci-Fi', 'genre_Sport', 'genre_Talk-Show', 'genre_Thriller', 'genre_War', 'genre_Western', 'category_actor', 'category_actress', 'category_archive_footage', 'category_archive_sound', 'category_cinematographer', 'category_composer', 'category_director', 'category_editor', 'category_producer', 'category_production_designer', 'category_self', 'category_writer']


In [13]:
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder

# Label Encoding 'primaryName'
encoder = LabelEncoder()
df['encoded_primaryName'] = encoder.fit_transform(df['primaryName'])

# Selecting the features for the KNN model
knn_columns = ['startYear', 'encoded_primaryName', 'runtimeMinutes', 'averageRating'] \
              + [col for col in df.columns if 'genre_' in col] \
              + ['category_actor', 'category_actress', 'category_director']

knn_df = df[knn_columns].astype(float)  # Ensure all values are float

# Initialize the model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=11)
model_knn.fit(knn_df)

# Function for getting movie recommendations
def get_recommendations(title):
    idx = df[df['title'] == title].index[0]
    distances, indices = model_knn.kneighbors(knn_df.iloc[idx].values.reshape(1, -1))
    movie_indices = indices.flatten()[1:]
    return df['title'].iloc[movie_indices]

# Test the function
print(get_recommendations('Oppenheimer'))


5187                          The Plains
36651                       Interstellar
44051              The Dark Knight Rises
67021                 The Gospel of John
57703                    The Dark Knight
50527                          Inception
63024                      Batman Begins
13867    They Say Nothing Stays the Same
8074                  Capitol Punishment
59094                             2 Epic
Name: title, dtype: object




In [14]:
# Import required libraries
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder
import requests

# Read data
file_path = Path("movies_merged.csv")
df = pd.read_csv(file_path)

# Filter the data for movies starting from 1970
df = df[df['startYear'] >= 1970]

# Label Encoding 'primaryName'
encoder = LabelEncoder()
df['encoded_primaryName'] = encoder.fit_transform(df['primaryName'])

# Selecting the features for the KNN model
knn_columns = ['startYear', 'encoded_primaryName', 'runtimeMinutes', 'averageRating'] \
              + [col for col in df.columns if 'genre_' in col] \
              + ['category_actor', 'category_actress', 'category_director']
              
knn_df = df[knn_columns].astype(float)  # Ensure all values are float

# Initialize and fit the model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=11)
model_knn.fit(knn_df)

# Function for getting movie recommendations
def get_recommendations(title):
    matching_movies = df[df['title'] == title]
    if matching_movies.empty:
        return f"No similar movies found for {title}"
    idx = matching_movies.index[0]
    distances, indices = model_knn.kneighbors(knn_df.iloc[idx].values.reshape(1, -1))
    movie_indices = indices.flatten()[1:]
    return df['title'].iloc[movie_indices]

# Make recommendations based on current box office movies
for index, row in df_box_office.iterrows():
    title = row['title']
    print(f"Movies similar to {title}:")
    recommendations = get_recommendations(title)
    print(recommendations)


NameError: name 'df_box_office' is not defined

In [None]:
import requests
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler

# Function to fetch box office data from IMDb API
def fetch_box_office(api_key):
    url = f"https://imdb-api.com/en/API/BoxOffice/{api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()['items']
    else:
        return None

# Function to preprocess data
def preprocess_data(df):
    return df  # Replace with actual preprocessing steps

# Fetch current box office movies
api_key = 'k_ubtu0kka'
box_office_movies = fetch_box_office(api_key)
if box_office_movies:
    df_box_office = pd.DataFrame(box_office_movies)

# Preprocess the fetched data
df_box_office_preprocessed = preprocess_data(df_box_office)

# Make recommendations based on current box office movies
for index, row in df_box_office.iterrows():
    title = row['title']
    print(f"Movies similar to {title}:")
    print(get_recommendations(title))


Movies similar to The Nun II:
No similar movies found for The Nun II
Movies similar to A Haunting in Venice:
No similar movies found for A Haunting in Venice
Movies similar to The Equalizer 3:
21544        I Can Only Imagine
9652        Realm of Terracotta
6461            Born a Champion
63646            Self Medicated
3131                  Home2Home
9159     The Tragedy of Macbeth
63227         Magic Carpet Ride
70487           Passion of Mind
60838                Half Light
23402            More Than Only
Name: title, dtype: object
Movies similar to My Big Fat Greek Wedding 3:
No similar movies found for My Big Fat Greek Wedding 3
Movies similar to Barbie:
58790                  The Golden Compass
12393                               Vivos
84754                An Accomplished Fact
67683                         Adaptation.
67635                      Matchstick Men
21422                    The Border Fence
44070                            Hold Up!
72651                    The Piano Play



In [None]:
# Function to fetch box office data from IMDb API
def fetch_box_office(api_key):
    url = f"https://imdb-api.com/en/API/BoxOffice/{api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()['items']
    else:
        return None

# Label Encoding 'primaryName'
encoder = LabelEncoder()
df['encoded_primaryName'] = encoder.fit_transform(df['primaryName'])

# Selecting the features for the KNN model
knn_columns = ['startYear', 'encoded_primaryName', 'runtimeMinutes', 'averageRating'] \
              + [col for col in df.columns if 'genre_' in col] \
              + ['category_actor', 'category_actress', 'category_director']

knn_df = df[knn_columns].astype(float)  # Ensure all values are float

# Initialize the model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=11)
model_knn.fit(knn_df)

# Function for getting movie recommendations
def get_recommendations(title):
    matching_movies = df[df['title'] == title]
    
    if matching_movies.empty:
        return f"No similar movies found for {title}"

    idx = matching_movies.index[0]
    distances, indices = model_knn.kneighbors(knn_df.iloc[idx].values.reshape(1, -1))
    movie_indices = indices.flatten()[1:]
    
    all_recommendations = df.iloc[movie_indices]
    all_recommendations = all_recommendations[all_recommendations['averageRating'] >= 7]  # Filter by rating
    
    # Filter out recommendations based on box office movies
    box_office_titles = set(df_box_office['title'])
    filtered_recommendations = all_recommendations[all_recommendations['title'].isin(box_office_titles)]
    
    if filtered_recommendations.empty:
        other_recommendations = all_recommendations['title']
        return f"No box office movies found similar to {title}. Other recommended movies: {other_recommendations.to_list()}"
    
    return filtered_recommendations['title']

# Read data
file_path = Path("movies_merged.csv")
df = pd.read_csv(file_path)

# Filter the data for movies starting from 1970
df = df[df['startYear'] >= 1970]

# Fetch current box office movies
api_key = 'k_ubtu0kka'
box_office_movies = fetch_box_office(api_key)
if box_office_movies:
    df_box_office = pd.DataFrame(box_office_movies)

# Test function by providing a movie title
title = 'Tron Legacy: An IMAX 3D Experience'
print(f"Movies similar to {title}:")
print(get_recommendations(title))





Movies similar to Tron Legacy: An IMAX 3D Experience:
No box office movies found similar to Tron Legacy: An IMAX 3D Experience. Other recommended movies: ['The Trial of the King Killers', 'The Falls: Covenant of Grace', 'Hands That Bind', 'Legend Quest: The Origin']


