### Exploratory Data Analysis

In [2]:
import pandas as pd
import ast
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import matplotlib.pyplot
import seaborn as sns

#### Importing data

In [3]:
# Read in TMDB movie data
tmdb_data = pd.read_csv(f'../data/movies_2015_2024.csv').drop_duplicates(subset=['Title','Year'])

# Import best picture data
best_picture = pd.read_csv('../data/best_picture.csv')
best_picture['Nominated'] = 'Yes'

In [4]:
# Merge best picture data and TMDB API data into a single DataFrame
movies_df = best_picture.merge(tmdb_data, how='right', on=['Title','Year'], validate='1:1')

# Fill NaN values 
values = {'Winner': 'No', 'Nominated': 'No'}
movies_df = movies_df.fillna(value=values)

#### Research question: convert budget and revenue data to 2024 dollars using CPI data

In [5]:
# Store CPI values 
cpi_data = pd.read_csv('../data/CPI_data.csv').set_index('Year')

In [6]:
budget_2024 = []
for year,budget in zip(movies_df['Year'], movies_df['Budget']):
    budget_adjusted = budget*(cpi_data.loc[2024,'CPI'] / cpi_data.loc[year,'CPI'])
    budget_2024.append(budget_adjusted)

# Reassign adjusted budget numbers to the Budget column
movies_df['Budget'] = budget_2024

In [7]:
revenue_2024 = []
for year,revenue in zip(movies_df['Year'], movies_df['Revenue']):
    revenue_adjusted = revenue*(cpi_data.loc[2024,'CPI'] / cpi_data.loc[year,'CPI'])
    revenue_2024.append(revenue_adjusted)

# Reassign adjusted budget numbers to the Revenue column
movies_df['Revenue'] = revenue_2024

In [8]:
# Rename the Budget and Revenue columns to reflect the conversion to 2024 dollars
movies_df = movies_df.rename(columns={'Budget': 'Budget (2024 dollars)', 'Revenue': 'Revenue (2024 dollars)'})

In [9]:
movies_df

Unnamed: 0,Title,Year,Winner,Nominated,Genre,Vote_Average,Vote_Count,Budget (2024 dollars),Revenue (2024 dollars),TMDB_ID
0,Avengers: Age of Ultron,2015,No,No,"['Action', 'Adventure', 'Science Fiction']",7.271,23853,4.830729e+08,1.860034e+09,99861
1,Mad Max: Fury Road,2015,No,Yes,"['Action', 'Adventure', 'Science Fiction']",7.627,23507,1.985231e+08,5.014142e+08,76341
2,Inside Out,2015,No,No,"['Animation', 'Family', 'Adventure', 'Drama', ...",7.910,22922,2.316103e+08,1.135038e+09,150540
3,Jurassic World,2015,No,No,"['Action', 'Adventure', 'Science Fiction', 'Th...",6.699,21100,1.985231e+08,2.212259e+09,135397
4,The Martian,2015,No,Yes,"['Drama', 'Adventure', 'Science Fiction']",7.691,20585,1.429366e+08,8.351985e+08,286217
...,...,...,...,...,...,...,...,...,...,...
1000,Fences,2016,No,Yes,['Drama'],6.872,2728,3.136799e+07,8.417076e+07,393457
1001,Drive My Car,2021,No,Yes,['Drama'],7.432,1407,1.504948e+06,1.777696e+07,758866
1002,Women Talking,2022,No,Yes,['Drama'],6.861,661,2.143746e+07,8.134893e+06,777245
1003,Maestro,2023,No,Yes,"['Drama', 'Romance', 'Music']",6.291,790,8.235955e+07,3.088483e+05,523607
