In [24]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import seaborn as sns

In [25]:
# The goal of this notebook is to examine 3 things:
# 1. average revenue to budget ratio (scalar value)
# 2. Does having a larger budget lead to more revenue?
# 3. Does having a larger budget lead to better ratings?

In [30]:
# read in cleaned movies metainformation
metainfo_data = pd.read_csv('Datasets/cleaned-movies-metainformation.csv')

# We only need budget, id, and revenue, so let's drop all of the other columns.
metainfo_data.drop(['adult', 'belongs_to_collection', 'genres', 'original_language', 'original_title', 'popularity', 'production_companies', 'production_countries', 'release_date', 'runtime', 'spoken_languages', 'status', 'title', 'vote_average', 'vote_count'], axis = 1, inplace = True)

# Now we want to get rid of all the rows where budget or revenue is null.
# QUESTION: Do we want to remove rows where budget or revenue equals 0?
index_names = metainfo_data[metainfo_data['budget'].isnull() | metainfo_data['revenue'].isnull()].index
metainfo_data.drop(index_names, inplace = True)

# To make sure we still have enough data to work with after dropping all of these rows, let's check the size of our dataframe.
print(metainfo_data.size)
# Seems like we have enough data to work with even after all the drops.

# Proceed to check the first few columns of our dataframe.
metainfo_data.head()
# The data looks good, so let's move on!

16140


Unnamed: 0,budget,id,revenue
0,30000000.0,862,373554033.0
1,65000000.0,8844,262797249.0
3,16000000.0,31357,81452156.0
5,60000000.0,949,187436818.0
8,35000000.0,9091,64350171.0


In [34]:
# The first thing we want to do is find the average revenue to budget ratio.
# We can find this value by finding the revenue to budget ratio for each row and then averaging all of the ratios.
ratios_sum = 0
for index, row in metainfo_data.iterrows():
    ratios_sum += row['revenue'] / row['budget']
ratios_avg = ratios_sum / metainfo_data.size
# The average revenue to budget ratio means that on average, for every dollar spent on the movie budget, 1855.64 dollars will be made in revenue.
print(ratios_avg)

1855.6440117454727
