In [126]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [127]:
annual_ticket_sales = pd.read_csv("AnnualTicketSales.csv")
highest_grossers = pd.read_csv("HighestGrossers.csv")
popular_creative_types = pd.read_csv("PopularCreativeTypes.csv")
top_distributors = pd.read_csv("TopDistributors.csv")
top_genres = pd.read_csv("TopGenres.csv")
top_grossing_ratings = pd.read_csv("TopGrossingRatings.csv")
top_grossing_sources = pd.read_csv("TopGrossingSources.csv")
top_production_methods = pd.read_csv("TopProductionMethods.csv")
wide_release_count = pd.read_csv("WideReleasesCount.csv")

In [128]:
annual_ticket_sales.head()

Unnamed: 0,YEAR,TICKETS SOLD,TOTAL BOX OFFICE,TOTAL INFLATION ADJUSTED BOX OFFICE,AVERAGE TICKET PRICE,Unnamed: 5
0,2021,423774881,"$3,881,777,912","$3,881,777,912",$9.16,
1,2020,223638958,"$2,048,534,616","$2,048,534,616",$9.16,
2,2019,1228541629,"$11,253,443,955","$11,253,444,050",$9.16,
3,2018,1311536128,"$11,948,096,650","$12,013,670,952",$9.11,
4,2017,1225639761,"$10,993,991,460","$11,226,860,216",$8.97,


In [129]:
annual_ticket_sales.drop(columns=["Unnamed: 5"], axis=1, inplace=True)

In [130]:
annual_ticket_sales.describe(include="all")

Unnamed: 0,YEAR,TICKETS SOLD,TOTAL BOX OFFICE,TOTAL INFLATION ADJUSTED BOX OFFICE,AVERAGE TICKET PRICE
count,27.0,27.0,27,27,27
unique,,27.0,27,27,25
top,,423774881.0,"$3,881,777,912","$3,881,777,912",$9.16
freq,,1.0,1,1,3
mean,2008.0,,,,
std,7.937254,,,,
min,1995.0,,,,
25%,2001.5,,,,
50%,2008.0,,,,
75%,2014.5,,,,


In [131]:
annual_ticket_sales.isnull().sum()

YEAR                                   0
TICKETS SOLD                           0
TOTAL BOX OFFICE                       0
TOTAL INFLATION ADJUSTED BOX OFFICE    0
AVERAGE TICKET PRICE                   0
dtype: int64

In [132]:
annual_ticket_sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 5 columns):
 #   Column                               Non-Null Count  Dtype 
---  ------                               --------------  ----- 
 0   YEAR                                 27 non-null     int64 
 1   TICKETS SOLD                         27 non-null     object
 2   TOTAL BOX OFFICE                     27 non-null     object
 3   TOTAL INFLATION ADJUSTED BOX OFFICE  27 non-null     object
 4   AVERAGE TICKET PRICE                 27 non-null     object
dtypes: int64(1), object(4)
memory usage: 1.2+ KB


In [133]:
annual_ticket_sales["TICKETS SOLD"] = annual_ticket_sales["TICKETS SOLD"].str.replace(",","")

for i in ["TOTAL BOX OFFICE","TOTAL INFLATION ADJUSTED BOX OFFICE"]:
    annual_ticket_sales[i] = annual_ticket_sales[i].str.replace(",","")
    annual_ticket_sales[i] = annual_ticket_sales[i].str.replace("$","")

annual_ticket_sales["AVERAGE TICKET PRICE"] = annual_ticket_sales["AVERAGE TICKET PRICE"].str.replace("$", "")


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.



In [134]:
annual_ticket_sales.head()

Unnamed: 0,YEAR,TICKETS SOLD,TOTAL BOX OFFICE,TOTAL INFLATION ADJUSTED BOX OFFICE,AVERAGE TICKET PRICE
0,2021,423774881,3881777912,3881777912,9.16
1,2020,223638958,2048534616,2048534616,9.16
2,2019,1228541629,11253443955,11253444050,9.16
3,2018,1311536128,11948096650,12013670952,9.11
4,2017,1225639761,10993991460,11226860216,8.97


In [135]:
for col in ["TICKETS SOLD", "TOTAL BOX OFFICE", "TOTAL INFLATION ADJUSTED BOX OFFICE"]:
    annual_ticket_sales[col] = pd.to_numeric(annual_ticket_sales[col])

annual_ticket_sales["AVERAGE TICKET PRICE"] = pd.to_numeric(annual_ticket_sales["AVERAGE TICKET PRICE"], downcast="float")

In [136]:
annual_ticket_sales.head()

Unnamed: 0,YEAR,TICKETS SOLD,TOTAL BOX OFFICE,TOTAL INFLATION ADJUSTED BOX OFFICE,AVERAGE TICKET PRICE
0,2021,423774881,3881777912,3881777912,9.16
1,2020,223638958,2048534616,2048534616,9.16
2,2019,1228541629,11253443955,11253444050,9.16
3,2018,1311536128,11948096650,12013670952,9.11
4,2017,1225639761,10993991460,11226860216,8.97


In [253]:
annual_ticket_sales["avg"] = annual_ticket_sales["AVERAGE TICKET PRICE"]*150000000
annual_ticket_sales["box"] = annual_ticket_sales["TOTAL BOX OFFICE"]/5

In [276]:
fig1 = px.bar(annual_ticket_sales,
            x="YEAR",
            y="TICKETS SOLD",
            template="simple_white",
            title="Ticket Sales per Year")

fig2 = px.line(annual_ticket_sales,
            x="YEAR",
            y="avg",
            color_discrete_sequence=["black"],
            hover_data=["AVERAGE TICKET PRICE"])

fig2.update_yaxes(tickprefix="$", showgrid=True)

fig3 = px.line(annual_ticket_sales,
            x="YEAR",
            y="box",
            color_discrete_sequence=["ForestGreen"],
            hover_data=["TOTAL BOX OFFICE"])

fig3.update_yaxes(tickprefix="$", showgrid=True)



fig = go.Figure(data=fig1.data + fig2.data + fig3.data)

fig.update_layout(hovermode="x unified", template="simple_white", title="Ticket Sales and Total Box Office per Year")
fig.update_layout(title_x=0.5,
    font_family="Rockwell",
    legend=dict(
        title=None, orientation="h", y=1, yanchor="bottom", x=0.5, xanchor="center"
    )
)

fig.add_annotation(text="COVID-19", x="2020", arrowcolor="black", font_size=15, bgcolor="white", align=)

fig.show()