In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
disney_path = '../input/disney-movies-19372016-total-gross/disney_movies_total_gross.csv'

disney_data = pd.read_csv(disney_path)

# Introduction:

Disney seems to be successfull with children since the first movie, and everytime this company gets bigger ang bigger with their animations and thematic parks, also disney pursue a lot of other animations and movie studios which make this company one of the biggest in the world.

# Preprocessing:

In [None]:
disney_data.head()

In [None]:
disney_data.info()

In [None]:
## Converting release_date to date type:
disney_data['release_date'] = pd.to_datetime(disney_data['release_date'])

In [None]:
# Let's create an column named year to visualize later
disney_data['year'] = disney_data['release_date'].dt.date.astype(str).str.split('-', expand=True)[0]

In [None]:
disney_data.dtypes

In [None]:
disney_data.describe().T

# Analysing the statistical data:

## The best and worst gross:

In [None]:
print('the best total gross', disney_data['total_gross'].max(),
      '\nThe worse total gross:', disney_data['total_gross'].min())

In [None]:
print('the best inflation adjusted gross', disney_data['inflation_adjusted_gross'].max(),'\nThe worse inflation_adjusted_gross:', disney_data['inflation_adjusted_gross'].min())

## What are the movies ratings and genres?

In [None]:
plt.figure(figsize=(10, 6))
plt.title('Disney movies rating')
sns.countplot(x='mpaa_rating', data=disney_data)
plt.xlabel('rating')

In [None]:
plt.figure(figsize=(10, 6))
plt.title('Disney movies genres')
sns.countplot(y='genre', data=disney_data)

As expected the most of disney movie-genres are comedy, adventure and drama.

## Analysing number per year:

In [None]:
plt.figure(figsize=(12, 8))
plt.xticks(rotation=90)
plt.title('Disney movies per year')
sns.histplot(disney_data['year'], kde=True)

## Analysing the gross:

The total gross ignores the inflation, so we can see how much money they've gained in the time, but since the money's values change with time we need to analyse the gross with inflation, the gross with inflation makes possible to compare how much they've gained in the current monetary value.

In [None]:
plt.figure(figsize=(12, 6))
plt.title('Evolution of movies gross with time')
plt.xlabel('release date')
plt.ylabel('total gross')
sns.lineplot(x='release_date', y='total_gross', data=disney_data)

In [None]:
plt.figure(figsize=(12, 6))
plt.title('Evolution of movies gross with time adjusted by inflation')
plt.xlabel('release date')
plt.ylabel('inflation adjusted gross')
sns.lineplot(x='release_date', y='inflation_adjusted_gross', data=disney_data)

When we visualize the first graph looks like the gross is increasing, which is not true when we see the inflaction adjusted graph

# Analysing values:

## Analysing movie genres:

In [None]:
avg_genre = disney_data.groupby('genre').mean()

In [None]:
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1).set_title('Gross per genre')
sns.barplot(x=avg_genre['total_gross'], y=avg_genre.index)
plt.subplot(2, 1, 2)
sns.barplot(x=avg_genre['inflation_adjusted_gross'], y=avg_genre.index)

Even though the genres that had more average total gross are action and adventure we can see that when we adjust by inflation musical is the genre that had the highest average gross.

In [None]:
sum_genre = disney_data.groupby('genre').sum()

In [None]:
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1).set_title('Gross per genre')
sns.barplot(x=sum_genre['total_gross'], y=sum_genre.index)
plt.subplot(2, 1, 2)
sns.barplot(x=sum_genre['inflation_adjusted_gross'], y=sum_genre.index)

Even though the average gross of Musical movies is high we can see that Adventure and Comedy are the ones with the highest gross sum, it can be explained by the number of movies that every genre had

In [None]:
disney_data['genre'].value_counts()

In [None]:
plt.figure(figsize=(12, 8))
sns.stripplot(y='genre', x='total_gross', data=disney_data)

In [None]:
plt.figure(figsize=(12, 8))
sns.stripplot(y='genre', x='inflation_adjusted_gross', data=disney_data)

In [None]:
plt.figure(figsize=(12, 6))
plt.xticks(rotation=30)
plt.ylabel('inflation adjusted gross')
sns.boxplot(x='genre', y='inflation_adjusted_gross', data=disney_data)

So, adventure movies are probably the most lucrative disney genre, but the musical genre have many outliers and tends to make musicals averagely better than other genres

## Analysing rating

In [None]:
avg_rating = disney_data.groupby('mpaa_rating').mean()
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1).set_title('average gross per rating')
sns.barplot(x=avg_rating['total_gross'], y=avg_rating.index)
plt.subplot(2, 1, 2)
sns.barplot(x=avg_rating['inflation_adjusted_gross'], y=avg_rating.index)

In [None]:
avg_rating = disney_data.groupby('mpaa_rating').sum()
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1).set_title('gross per rating')
sns.barplot(x=avg_rating['total_gross'], y=avg_rating.index)
plt.subplot(2, 1, 2)
sns.barplot(x=avg_rating['inflation_adjusted_gross'], y=avg_rating.index)

# Have disney movies income decreased?

In [None]:
year_income = disney_data.groupby('year').sum()

In [None]:
plt.figure(figsize=(12, 6))
plt.title('disney movies income')
plt.xlabel('year')
plt.ylabel('income')
plt.xticks(rotation=90)
sns.lineplot(x=year_income.index, y=year_income['total_gross'])

In [None]:
plt.figure(figsize=(12, 6))
plt.title('disney movies income adjusted')
plt.xlabel('year')
plt.ylabel('inflation adjusted income')
plt.xticks(rotation=90)
sns.lineplot(x=year_income.index, y=year_income['inflation_adjusted_gross'])