In [1]:
import pandas as pd
import altair as alt

In [2]:
df = pd.read_csv('disney_movies_total_gross.csv') 
df.head(10)

Unnamed: 0,movie_title,release_date,genre,mpaa_rating,total_gross,inflation_adjusted_gross
0,Snow White and the Seven Dwarfs,1937-12-21,Musical,G,184925485,5228953251
1,Pinocchio,1940-02-09,Adventure,G,84300000,2188229052
2,Fantasia,1940-11-13,Musical,G,83320000,2187090808
3,Song of the South,1946-11-12,Adventure,G,65000000,1078510579
4,Cinderella,1950-02-15,Drama,G,85000000,920608730
5,"20,000 Leagues Under the Sea",1954-12-23,Adventure,,28200000,528279994
6,Lady and the Tramp,1955-06-22,Drama,G,93600000,1236035515
7,Sleeping Beauty,1959-01-29,Drama,,9464608,21505832
8,101 Dalmatians,1961-01-25,Comedy,G,153000000,1362870985
9,The Absent Minded Professor,1961-03-16,Comedy,,25381407,310094574


# Visualisation 1

In [3]:
mpaa_lst = df['mpaa_rating'].unique()
vis1_df={'mpaa_rating':[],"amount":[]}
for mpaa in mpaa_lst:
    if mpaa=="Not Rated":
        continue
    vis1_df['mpaa_rating'].append(mpaa)
    vis1_df['amount'].append(df[df['mpaa_rating']==mpaa].shape[0])
vis1_df = pd.DataFrame(vis1_df).dropna()
vis1_df.head(20)

Unnamed: 0,mpaa_rating,amount
0,G,86
2,PG,187
3,R,102
4,PG-13,145


In [4]:
base = alt.Chart(vis1_df).encode(
    theta=alt.Theta("amount:Q", stack=True),
    radius=alt.Radius("amount", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20)),
    color=alt.Color("mpaa_rating:N", title='Rating of film')
)

c1 = base.mark_arc(innerRadius=30, stroke="#fff")

c2 = base.mark_text(radiusOffset=20).encode(text="amount:Q")

(c1 + c2).properties(width=700,height=500, title='Number of Disney movies in different MPAA ratings')

# Summary

#### The purpose of the visualization: To look at the total number of films made by Disney in terms of MPAA ratings

#### Alternative methods: make a bar chart or line plot

#### Prons:
#### This method shows very well the ratio of each rating of the total number of films

#### Cons:
#### None

# Visualisation 2 

In [5]:
genre_lst = df['genre'].unique()
vis2_df={'genre':[],"amount":[]}
for genre in genre_lst:
    vis2_df['genre'].append(genre)
    vis2_df['amount'].append(df[df['genre']==genre].shape[0])
vis2_df = pd.DataFrame(vis2_df).dropna()
vis2_df.head(20)

Unnamed: 0,genre,amount
0,Musical,16
1,Adventure,129
2,Drama,114
3,Comedy,182
5,Action,40
6,Horror,6
7,Romantic Comedy,23
8,Thriller/Suspense,24
9,Western,7
10,Black Comedy,3


In [6]:
alt.Chart(vis2_df).mark_bar(stroke='black').encode(
    x=alt.X('amount:Q', title='Amount'),
    y=alt.Y('genre:N', sort='-x', title='')
).properties(title="Number of Disney movies in different genres", width=700, height=500)

# Summary

#### The purpose of the visualization: To look at the total number of films made by Disney in terms of genres

#### Alternative methods: make a circular plot or line plot

#### Prons:
#### This method shows very well the hierarchy of genres to the number of films

#### Cons:
#### It is not always clear how many films were made in a particular genre

# Visualisation 3

In [7]:
vis3_df = df.copy()
vis3_df['release_date'] = vis3_df['release_date'].str[:4]
vis3_df['release_date'] = vis3_df.apply(lambda x: int(x['release_date']), axis=1)
vis3_df.head()

Unnamed: 0,movie_title,release_date,genre,mpaa_rating,total_gross,inflation_adjusted_gross
0,Snow White and the Seven Dwarfs,1937,Musical,G,184925485,5228953251
1,Pinocchio,1940,Adventure,G,84300000,2188229052
2,Fantasia,1940,Musical,G,83320000,2187090808
3,Song of the South,1946,Adventure,G,65000000,1078510579
4,Cinderella,1950,Drama,G,85000000,920608730


In [8]:
input_slider_gross = alt.binding_range(min=1983,
                                 max=vis3_df['release_date'].max(),
                                 step=1,
                                 name='Select year:')
select_year_gross = alt.selection_single(name="year",
                                   fields = ['release_date'],
                                   bind=input_slider_gross,
                                   init = {'release_date': 1983})

In [9]:
alt.Chart(vis3_df).mark_bar(stroke='black').encode(
    x=alt.X('total_gross:Q', title='Total Gross'),
    y=alt.Y('movie_title:N', sort='-x', title='')
).add_selection(select_year_gross
).transform_filter(select_year_gross
).properties(width = 600, height = 500, background = '#F9F9F9', padding = 25,
             title='Top movies by gross per each year')

# Summary

#### The purpose of the visualization: To look  at the gross of films by years to compare them and see which films were the highest grossing in any of the years
#### P.S. Starts from 1983 because before these year other years are without films or there is only one film and it is not so interesting in the context of comparison

#### Alternative methods: make line plot

 #### Prons:
 #### This method shows very well the hierarchy of films by grossing

  #### Cons:
 #### Years before 1983 should be cut short due to poor representativeness in previous years

# Visualisation 4

In [10]:
vis4_df = df.copy()
vis4_df['release_date'] = vis4_df['release_date'].str[:4]
vis4_df['release_date'] = vis4_df.apply(lambda x: int(x['release_date']), axis=1)
new_vis4_df = {'movie_title':[],'release_date':[],'total_gross':[],'inflation':[]}
for index, row in vis4_df.iterrows():
    for i in range(2):
        new_vis4_df['release_date'].append(row['release_date'])
        new_vis4_df['movie_title'].append(row['movie_title'])
        if i==0:
            new_vis4_df['total_gross'].append(row['total_gross'])
            new_vis4_df['inflation'].append('No')
        else:
            new_vis4_df['total_gross'].append(row['inflation_adjusted_gross'])
            new_vis4_df['inflation'].append('Yes')
vis4_df = pd.DataFrame(new_vis4_df)
vis4_df.head()

Unnamed: 0,movie_title,release_date,total_gross,inflation
0,Snow White and the Seven Dwarfs,1937,184925485,No
1,Snow White and the Seven Dwarfs,1937,5228953251,Yes
2,Pinocchio,1940,84300000,No
3,Pinocchio,1940,2188229052,Yes
4,Fantasia,1940,83320000,No


In [11]:
input_slider_inflation = alt.binding_range(min=vis4_df['release_date'].min(),
                                 max=vis4_df['release_date'].max(),
                                 step=1,
                                 name='Select year:')
select_year_inflation = alt.selection_single(name="year",
                                   fields = ['release_date'],
                                   bind=input_slider_inflation,
                                   init = {'release_date': vis4_df['release_date'].min()})

In [38]:
alt.Chart(vis4_df).mark_point(filled=True,stroke='black').encode(
    alt.X(
        'total_gross:Q',        
        scale=alt.Scale(zero=False),
        axis=alt.Axis(grid=False),
        title='Total gross'
    ),
    alt.Y(
        'movie_title:N',
        title="",
        sort='-x',
        axis=alt.Axis(grid=True)
       ),
     alt.OpacityValue(0.7),
    color=alt.Color('inflation:N', legend=alt.Legend(title="Inflation"))).add_selection(select_year_inflation
).transform_filter(select_year_inflation
).properties(width = 600, height = 400, background = '#F9F9F9', padding = 25,
             title='The difference between the actual collection of films and in terms of inflation in terms of years')

# Summary 

#### The purpose of the visualization: To look  at the gross of films by years to compare them with count of inflation
#### P.S. For me was facinating that Snow White and Seven Dwarfs with count of inflation earn more than 5 billion dollars 

#### Alternative methods: make line plot or bar chart

  #### Prons:
 #### This method shows very well the difference with inflation and without

 #### Cons:
 #### Visualization is more focused on showing the difference between fees with inflation and without it is not always easy to understand the concrete figures for movies

# Visualisation 5

In [13]:
vis5_df = df.copy()

In [14]:
input_dropdown_genre = alt.binding_select(options = vis5_df['genre'].dropna().unique(), name='Choose genre  ')
select_category_genre = alt.selection_single(fields = ['genre'], bind = input_dropdown_genre,init={'genre':"Action"})

In [37]:
alt.Chart(vis5_df).mark_bar(stroke='black').encode(
    x=alt.X('total_gross:Q', title='Total Gross'),
    y=alt.Y('movie_title:N',sort='-x', title='')
).add_selection(select_category_genre).transform_filter(select_category_genre).transform_window(
    rank='count(total_gross)',
    sort=[alt.SortField('total_gross', order='descending')]
).transform_filter(
    (alt.datum.rank <= 10)).properties(width = 600, height = 500, background = '#F9F9F9', padding = 25,
                                       title='Top 10 highest grossing films by genre')

# Summary

#### The purpose of the visualization: To look  at the gross of top 10 films by genres


#### Alternative methods: make line plot

 ####  Prons :
 #### This method shows very well the hierarchy of films by grossing

####  Cons:
#### Static limit with only 10 films

# Visualisation 6

In [16]:
vis6_df = df.copy()

In [17]:
input_dropdown_mpaa = alt.binding_select(options = vis6_df['mpaa_rating'].dropna().unique(), name='Choose rating  ')
select_category_mpaa = alt.selection_single(fields = ['mpaa_rating'], bind = input_dropdown_mpaa,init={'mpaa_rating':"G"})

In [18]:
alt.Chart(vis6_df).mark_bar(stroke='black').encode(
    x=alt.X('total_gross:Q',title='Total Gross'),
    y=alt.Y('movie_title:N', sort='-x', title='')
).add_selection(select_category_mpaa).transform_filter(select_category_mpaa).transform_window(
    rank='count(total_gross)',
    sort=[alt.SortField('total_gross', order='descending')]
).transform_filter(
    (alt.datum.rank <= 10)).properties(width = 600, height = 500, background = '#F9F9F9', padding = 25,
                                      title='Top 10 highest grossing films by MPAA rating')

# Summary

#### The purpose of the visualization: To look  at the gross of top 10 films by MPAA rating


#### Alternative methods: make line plot

#### Prons:
#### This method shows very well the hierarchy of films by grossing

####  Cons:
#### Static limit with only 10 films

# Visualisation 7

In [19]:
vis7_df = df.copy()
vis7_df=vis7_df.dropna()
vis7_df = vis7_df.groupby(['genre','mpaa_rating']).sum().reset_index()
vis7_df = vis7_df[vis7_df['mpaa_rating'] != 'Not Rated']
vis7_df.head(10)

Unnamed: 0,genre,mpaa_rating,total_gross,inflation_adjusted_gross
0,Action,PG,211929620,426165810
1,Action,PG-13,3161698234,3516434321
2,Action,R,748861460,1407044726
3,Adventure,G,4753689326,10136004565
4,Adventure,PG,7350121958,8625757598
5,Adventure,PG-13,3976887642,4550807668
6,Adventure,R,97764039,151859393
7,Black Comedy,R,97543212,156730475
8,Comedy,G,1714368699,3696144679
10,Comedy,PG,4027381729,6857455790


In [20]:
alt.Chart(vis7_df).mark_rect(stroke='black').encode(
    x = alt.X('mpaa_rating:O', title='',axis=alt.Axis(labelAngle=0)),
    y = alt.Y('genre:O', title=''),
    color = alt.Color('total_gross:Q',title='Total Gross', scale = alt.Scale(scheme = 'goldgreen',domainMid=2000000000))
).properties(title=
             alt.TitleParams('General gross of films in terms of genre and MPAA rating'),
             height=400,width=600,background='#F9F9F9')

# Summary

#### The purpose of the visualization: To look  at the best combinations of genre and MPAA rating based on gross


#### Alternative methods: don't know

 #### Prons:
 #### This method shows very well the best combinations of genre and MPAA rating based on gross. P.S. As for me, colors gamma is very pleasing to the eye

 #### Cons:

#### Some combinations of ratings and genres do not exist and there is just an empty space    

# Visualisation 8

In [21]:
vis8_df=vis7_df.copy()

In [30]:
alt.Chart(vis8_df).mark_rect(stroke='black').encode(
    x = alt.X('mpaa_rating:O', title='',axis=alt.Axis(labelAngle=0)),
    y = alt.Y('genre:O', title=''),
    color = alt.Color('inflation_adjusted_gross:Q',title='Total Gross', scale = alt.Scale(scheme = 'goldgreen',domainMid=4000000000))
).properties(title=
             alt.TitleParams('General gross of films in terms of genre and MPAA rating adjusted for inflation'),
             height=400,width=600,background='#F9F9F9')

# Summary

#### The purpose of the visualization: To look  at the best combinations of genre and MPAA rating based on gross and inflation

#### Alternative methods: don't know

#### Prons: 
#### This method shows very well the best combinations of genre and MPAA rating based on gross

#### Cons:
#### Some combinations of ratings and genres do not exist and there is just an empty space    