# Disney Analysis

## Imports

In [59]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

## Load Dataset

In [60]:
def load_csv(name = 'csv_pickle_disney_movies'):
    return pd.read_csv('datasets/' + name + '.csv')

In [61]:
df = load_csv()

In [62]:
df.head()

Unnamed: 0.1,Unnamed: 0,title,Cover Url,Production company,Release date,Running time,Country,Language,Box office,url,...,Music by,Distributed by,Story by,Narrated by,Cinematography,Edited by,Languages,Screenplay by,Countries,Production companies
0,0,Academy Award Review of,https://upload.wikimedia.org/wikipedia/en/thum...,Walt Disney Productions,1937-05-19 00:00:00,41.0,United States,English,45.472,https://en.wikipedia.org//wiki/Academy_Award_R...,...,,,,,,,,,,
1,1,Snow White and the Seven Dwarfs,https://upload.wikimedia.org/wikipedia/en/thum...,Walt Disney Productions,1937-12-21 00:00:00,83.0,United States,English,418000000.0,https://en.wikipedia.org//wiki/Snow_White_and_...,...,"Frank Churchill,Paul Smith,Leigh Harline",RKO Radio Pictures,,,,,,,,
2,2,Pinocchio,https://upload.wikimedia.org/wikipedia/en/thum...,Walt Disney Productions,1940-02-07 00:00:00,88.0,United States,English,164000000.0,https://en.wikipedia.org//wiki/Pinocchio_(1940...,...,Leigh HarlinePaul J. Smith,RKO Radio Pictures,Ted SearsOtto EnglanderWebb SmithWilliam Cottr...,,,,,,,
3,3,Fantasia,https://upload.wikimedia.org/wikipedia/en/thum...,Walt Disney Productions,1940-11-13 00:00:00,126.0,United States,English,76.4,https://en.wikipedia.org//wiki/Fantasia_(1940_...,...,See program,RKO Radio Pictures,"Joe Grant,Dick Huemer",Deems Taylor,James Wong Howe,,,,,
4,4,The Reluctant Dragon,https://upload.wikimedia.org/wikipedia/en/thum...,Walt Disney Productions,1941-06-20 00:00:00,74.0,United States,English,960000.0,https://en.wikipedia.org//wiki/The_Reluctant_D...,...,Frank ChurchillLarry Morey,RKO Radio Pictures,,,Bert Glennon,Paul Weatherwax,,,,


## Cleaning up

In [63]:
# Remove the 'Unnamed' column
df.drop(columns = ['Unnamed: 0'], inplace = True)

In [64]:
# Remove 'Production company' and 'Production companies' column, cause we already know the company, duh
df.drop(columns = ['Production company', 'Production companies'], inplace = True)

In [65]:
# Remove 'Languages' column due to being obvious that it's primary in English
df.drop(columns = ['Languages'], inplace = True)

In [66]:
# Since Amazon posters make more sense, 'Cover Url' column should be removed
df.drop(columns = ['Cover Url'], inplace = True)

In [67]:
# Remove the 'URL' column since we don't need it anymore
df.drop(columns = ['url'], inplace = True)

In [68]:
# Remove columns 'Narrated by', 'Languages' and 'Countries' since they have very few records
df.drop(columns = ['Narrated by', 'Countries', 'Story by'], inplace = True)

In [79]:
# Rename columns
df = df.rename(columns = {'Written by': 'Writer(s)', 
                          'Produced by': 'Producer(s)', 
                          'Directed by': 'Director(s)', 
                          'Starring': 'Actors', 
                          'Running time': 'Duration',
                          'title': 'Title',
                         })

In [82]:
df.head()

Unnamed: 0,Title,Release date,Duration,Country,Language,Box office,Budget,Rotten Tomatoes,IMDB,Metascore,...,Director(s),Producer(s),Writer(s),Based on,Actors,Music by,Distributed by,Cinematography,Edited by,Screenplay by
0,Academy Award Review of,1937-05-19 00:00:00,41.0,United States,English,45.472,,,7.1,,...,,,,,,,,,,
1,Snow White and the Seven Dwarfs,1937-12-21 00:00:00,83.0,United States,English,418000000.0,1490000.0,,7.6,95.0,...,"David Hand ,William Cottrell,Wilfred Jackson,L...",Walt Disney,"Ted Sears,Richard Creedon,Otto Englander,Dick ...",Snow Whiteby The Brothers Grimm,"Adriana Caselotti,Lucille La Verne,Harry Stock...","Frank Churchill,Paul Smith,Leigh Harline",RKO Radio Pictures,,,
2,Pinocchio,1940-02-07 00:00:00,88.0,United States,English,164000000.0,2600000.0,73%,7.4,99.0,...,Supervising DirectorsBen SharpsteenHamilton Lu...,Walt Disney,,The Adventures of Pinocchioby Carlo Collodi,"Cliff Edwards,Dickie Jones,Christian Rub,Walte...",Leigh HarlinePaul J. Smith,RKO Radio Pictures,,,
3,Fantasia,1940-11-13 00:00:00,126.0,United States,English,76.4,2280000.0,95%,7.7,96.0,...,"Samuel Armstrong,James Algar,Bill Roberts,Paul...","Walt Disney,Ben Sharpsteen",,,"Leopold Stokowski,Deems Taylor",See program,RKO Radio Pictures,James Wong Howe,,
4,The Reluctant Dragon,1941-06-20 00:00:00,74.0,United States,English,960000.0,600000.0,67%,6.9,,...,"Alfred Werker Hamilton Luske Jack Cutting, Ub ...",Walt Disney,Live-action:Ted SearsAl PerkinsLarry ClemmonsB...,,Robert BenchleyFrances GiffordBuddy PepperNana...,Frank ChurchillLarry Morey,RKO Radio Pictures,Bert Glennon,Paul Weatherwax,
