# Neon Films Analysis of Audience Movie Preferences

## Step 1: Data Collection

#### A. Load list of movies

In [8]:
import pandas as pd

try:
    # load excel file
    movies_list = pd.read_excel("data/movies.xlsx")
    print("File loaded successfully.")

except Exception as e:
    # handle errors
    print(f"Error loading file: {e}")

File loaded successfully.


In [16]:
print(movies_list.head())

                title
0  The Age of Adaline
1     Black Christmas
2              London
3             Twisted
4   Friends with Kids


In [10]:
print('Number of movies:', movies_list.size)

Number of movies: 150


#### B. Fetch data from OMDB API

In [11]:
import requests
import time

# OMDB API Key
OMDB_API_KEY = "c0c5b16c"

In [27]:
# Function: get_omdb_data_title
# Description: gets data from OMDP API for movie title
# Input: movie title (str)
# Output: dict containing movie data if successful, otherwise None
# Example Usage: movie_data = get_omdb_data_title("Inception")
def get_omdb_data_title(title):
    try:
        # request data from OMDP API
        url = f"http://www.omdbapi.com/?t={title}&apikey={OMDB_API_KEY}"
        response = requests.get(url)
        data = response.json()
        # if response is recieved
        if data.get("Response") == "True":
            return {
                "title": data.get("Title"),
                "year": data.get("Year"),
                "rated": data.get("Rated"),
                "runtime": data.get("Runtime"),
                "imdb_rating": data.get("imdbRating"),
                "imdb_votes": data.get("imdbVotes"),
                "box_office": data.get("BoxOffice"),
                "released": data.get("Released"),
                "genre": data.get("Genre"),
                "director": data.get("Director"),
                "writer": data.get("Writer"),
                "actors": data.get("Actors"),
                "plot": data.get("Plot"),
                "language": data.get("Language"),
                "country": data.get("Country"),
                "awards": data.get("Awards"),
                "poster": data.get("Poster"),
                "metascore": data.get("Metascore"),
                "ratings": data.get("Ratings")
            }
        else:
            return None
    # handle errors
    except Exception as e:
        print(f"Error: {e}")
        return None

In [28]:
# get_omdb_data_from_title test 
movie_data = get_omdb_data_title("Inception")
if movie_data:
    print(movie_data)
else:
    print("Movie data not found.")

{'title': 'Inception', 'year': '2010', 'rated': 'PG-13', 'runtime': '148 min', 'imdb_rating': '8.8', 'imdb_votes': '2,645,456', 'box_office': '$292,587,330', 'released': '16 Jul 2010', 'genre': 'Action, Adventure, Sci-Fi', 'director': 'Christopher Nolan', 'writer': 'Christopher Nolan', 'actors': 'Leonardo DiCaprio, Joseph Gordon-Levitt, Elliot Page', 'plot': 'A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into the mind of a C.E.O., but his tragic past may doom the project and his team to disaster.', 'language': 'English, Japanese, French', 'country': 'United States, United Kingdom', 'awards': 'Won 4 Oscars. 159 wins & 220 nominations total', 'poster': 'https://m.media-amazon.com/images/M/MV5BMjAxMzY3NjcxNF5BMl5BanBnXkFtZTcwNTI5OTM0Mw@@._V1_SX300.jpg', 'metascore': '74', 'ratings': [{'Source': 'Internet Movie Database', 'Value': '8.8/10'}, {'Source': 'Rotten Tomatoes', 'Value': '87%'}, {'Source': 'Metacritic

In [29]:
# Function: get_omdb_data_dataframe
# Description: gets data from OMDP API for dataframe
# Input: list of movies (dataframe), delay in seconds between API requests (int)
# Output: dataframe containing data for each movie in the input if successful, otherwise None
# Example Usage: movies_df = fetch_omdb_data_movies(movies_list, delay=1)
def fetch_omdb_data_movies(df, delay=1):
    # store data for each movie
    omdb_data = []
    # go through movie list
    for i, title in enumerate(df["title"]):
        print(f"Getting data of movie {i+1}/{len(df)}: {title}")
        try:
            # get data for each movie
            data = get_omdb_data_from_title(title)
            if data:
                omdb_data.append(data)
        except Exception as e:
            print(f"Error fetching data for '{title}': {e}")
        finally:
            # set delay to avoid API rate limits
            time.sleep(delay)

    # convert list to dataframe
    omdb_df = pd.DataFrame(omdb_data)
    return omdb_df

In [30]:
import os

# define excel file path to store omdb data
omdb_path = "data/omdp_data.xlsx"

# If the Excel file does not exist, fetch data and save it to the file
if not os.path.exists(omdb_path):
    print("Fetching data from OMDB API...")
    movies_df = fetch_omdb_data_movies(movies_list, delay=1)
    print(f"Saving data to {omdb_path}...")
    try:
        movies_df.to_excel(omdb_path, index=False)
        print(f"Data saved successfully to {omdb_path}.")
    except Exception as e:
        print(f"Error saving {omdb_path}: {e}")

Fetching data from OMDB API...
Getting data of movie 1/150: The Age Of Adaline
Getting data of movie 2/150: Black Christmas
Getting data of movie 3/150: London
Getting data of movie 4/150: Twisted
Getting data of movie 5/150: Friends With Kids
Getting data of movie 6/150: Ken Park
Getting data of movie 7/150: Girls Trip
Getting data of movie 8/150: The Art Of The Steal
Getting data of movie 9/150: Macbeth
Getting data of movie 10/150: Fatherhood
Getting data of movie 11/150: London Has Fallen
Getting data of movie 12/150: Anthropoid
Getting data of movie 13/150: Me, Myself & Irene
Getting data of movie 14/150: The Last Face
Getting data of movie 15/150: The Fly
Getting data of movie 16/150: A Quiet Place
Getting data of movie 17/150: God'S Not Dead 2
Getting data of movie 18/150: Marvel One-Shot: Agent Carter
Getting data of movie 19/150: Hamlet 2
Getting data of movie 20/150: Where The Truth Lies
Getting data of movie 21/150: Ashby
Getting data of movie 22/150: Jaws
Getting data of mo

In [33]:
# Load movies_df from excel file
omdb_path = "data/omdp_data.xlsx"
try:
    print(f"Loading data from {omdb_path}...")
    movies_df = pd.read_excel(omdb_path)
    print("Successfully loaded.")
except Exception as e:
    print(f"Error loading {omdb_path}: {e}")
    movies_df = pd.DataFrame()

Loading data from data/omdp_data.xlsx...
Successfully loaded.


In [34]:
# check loaded dataframe
movies_df.head()

Unnamed: 0,Title,Year,Rated,Runtime,IMDB_Rating,IMDB_Votes,Box_Office,Released,Genre,Director,Writer,Actors,Plot,Language,Country,Awards,Poster,Metascore,Ratings
0,The Age of Adaline,2015,PG-13,112 min,7.2,212877,"$42,629,776",24 Apr 2015,"Drama, Fantasy, Romance",Lee Toland Krieger,"J. Mills Goodloe, Salvador Paskowitz","Blake Lively, Michiel Huisman, Harrison Ford","A young woman, born at the turn of the 20th ce...","English, Portuguese, Italian","United States, Canada",1 win & 10 nominations,https://m.media-amazon.com/images/M/MV5BMTAzMT...,51.0,"[{'Source': 'Internet Movie Database', 'Value'..."
1,Black Christmas,1974,R,98 min,7.1,50352,,20 Dec 1974,"Horror, Mystery, Thriller",Bob Clark,Roy Moore,"Olivia Hussey, Keir Dullea, Margot Kidder","During their Christmas break, a group of soror...","English, Latin",Canada,3 wins & 2 nominations,https://m.media-amazon.com/images/M/MV5BZjdiMz...,65.0,"[{'Source': 'Internet Movie Database', 'Value'..."
2,London,2005,R,92 min,6.3,25119,"$20,361",03 Jul 2021,"Drama, Romance",Hunter Richards,Hunter Richards,"Jessica Biel, Chris Evans, Jason Statham","At a drug laden party in a New York loft, a yo...",English,"United Kingdom, United States",1 win & 1 nomination total,https://m.media-amazon.com/images/M/MV5BMTc5Mz...,24.0,"[{'Source': 'Internet Movie Database', 'Value'..."
3,Twisted,2004,R,97 min,5.3,23599,"$25,198,598",27 Feb 2004,"Crime, Drama, Mystery",Philip Kaufman,Sarah Thorp,"Ashley Judd, Samuel L. Jackson, Andy Garcia","Jessica, whose father killed her mother and co...","English, Italian, Spanish","United States, Germany",1 win,https://m.media-amazon.com/images/M/MV5BMGJhNj...,26.0,"[{'Source': 'Internet Movie Database', 'Value'..."
4,Friends with Kids,2011,R,107 min,6.1,43369,"$7,251,073",16 Mar 2012,"Comedy, Drama, Romance",Jennifer Westfeldt,Jennifer Westfeldt,"Jennifer Westfeldt, Adam Scott, Maya Rudolph",Two best friends decide to have a child togeth...,English,United States,2 nominations,https://m.media-amazon.com/images/M/MV5BMjIyMD...,55.0,"[{'Source': 'Internet Movie Database', 'Value'..."
