# Analysis of the movie data for each movie industry with the corresponding region population

You can explore the interactive plots as you wish. 

Please ignore the big chunks of codes, we will put them in plot_funtions.py in P3.

In [1]:
%matplotlib inline
import os
import pandas as pd
import requests
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from ipywidgets import interact, interactive, Output
import plotly.graph_objects as go
from IPython.display import display
from scipy.stats import gaussian_kde
from plot_functions_forall import *


In [2]:
DATA_FOLDER = '/Users/zaynebmellouli/MA1/ada-2024-project-advanceddestroyers0fall/data/final/'

In [3]:
def var_loader(DATA_FOLDER, mode='hollywood'):
    results = []
    results.append(pd.read_csv(DATA_FOLDER + f"{mode}/"+ f"{mode}_data.csv"))
    results.append(pd.read_csv(DATA_FOLDER + f"{mode}/"+ f"{mode}_data_ethnicity.csv"))
    results.append(pd.read_csv(DATA_FOLDER + f"{mode}/"+ f"{mode}_ethnic_realworld.csv"))
    results.append(pd.read_csv(DATA_FOLDER + f"{mode}/"+ f"male_{mode}_realworld_averages.csv"))
    results.append(pd.read_csv(DATA_FOLDER + f"{mode}/"+ f"female_{mode}_realworld_averages.csv"))
    results.append(pd.read_csv(DATA_FOLDER + f"{mode}/"+ f"bothsexes_{mode}_realworld_averages.csv"))
    results.append(pd.read_csv(DATA_FOLDER + f"{mode}/"+ f"male_{mode}_realworld_proportions.csv"))
    results.append(pd.read_csv(DATA_FOLDER + f"{mode}/"+ f"female_{mode}_realworld_proportions.csv"))
    return results

In [4]:
# Charging the Hollywood datasets in their respective dataframes
hollywood_data, hollywood_data_ethnicity, hollywood_ethnic_realworld, \
male_hollywood_realworld_averages, female_hollywood_realworld_averages, \
bothsexes_hollywood_realworld_averages, male_hollywood_realworld_proportions, \
female_hollywood_realworld_proportions = var_loader(DATA_FOLDER, mode="hollywood")

In [5]:
# Charging the Bollywood datasets in their respective dataframes
bollywood_data, bollywood_data_ethnicity, bollywood_ethnic_realworld, \
male_bollywood_realworld_averages, female_bollywood_realworld_averages, \
bothsexes_bollywood_realworld_averages, male_bollywood_realworld_proportions, \
female_bollywood_realworld_proportions = var_loader(DATA_FOLDER, mode="bollywood")

In [6]:
# Charging the East-Asia datasets in their respective dataframes
eastasia_data, eastasia_data_ethnicity, eastasia_ethnic_realworld, \
male_eastasia_realworld_averages, female_eastasia_realworld_averages, \
bothsexes_eastasia_realworld_averages, male_eastasia_realworld_proportions, \
female_eastasia_realworld_proportions = var_loader(DATA_FOLDER, mode="eastasia")

In [7]:
# Prepare the region data
region_data = {
    'Hollywood': {
        'region_data': hollywood_data,
        'male_real_world_proportions': male_hollywood_realworld_proportions,
        'female_real_world_proportions': female_hollywood_realworld_proportions,
        'male_real_world_averages': male_hollywood_realworld_averages,
        'female_real_world_averages': female_hollywood_realworld_averages,
        'bothsexes_real_world_averages': bothsexes_hollywood_realworld_averages,
        'data_ethnicity': hollywood_data_ethnicity,
        'realworld_ethnicity': hollywood_ethnic_realworld,
        'ethnicities': [
            "African Americans", 
            "American Indians", 
            "Arab Americans", 
            "Asian Americans", 
            "Caucasian Americans", 
            "Jewish Americans", 
            "Latino Americans"
        ]
    },
    
    'Bollywood': {
        'region_data': bollywood_data,
        'male_real_world_proportions': male_bollywood_realworld_proportions,
        'female_real_world_proportions': female_bollywood_realworld_proportions,
        'male_real_world_averages': male_bollywood_realworld_averages,
        'female_real_world_averages': female_bollywood_realworld_averages,
        'bothsexes_real_world_averages': bothsexes_bollywood_realworld_averages,
        'data_ethnicity': bollywood_data_ethnicity,
        'realworld_ethnicity': bollywood_ethnic_realworld,
        'ethnicities': [
            "South_Indian_Ethnicities", 
            "North_Indian_Ethnicities", 
            "Eastern_Indian_Ethnicities", 
            "Western_and_Central_Indian_Ethnicities", 
            "Religious_and_Caste_Groups"
        ]
    },
    
     'Eastasia': {
         'region_data': eastasia_data,
         'male_real_world_proportions': male_eastasia_realworld_proportions,
         'female_real_world_proportions': female_eastasia_realworld_proportions,
         'male_real_world_averages': male_eastasia_realworld_averages,
         'female_real_world_averages': female_eastasia_realworld_averages,
         'bothsexes_real_world_averages': bothsexes_eastasia_realworld_averages,
         'data_ethnicity': eastasia_data_ethnicity,
         'realworld_ethnicity': eastasia_ethnic_realworld,
         'ethnicities': [
             "Chinese", 
             "Taiwanese", 
             "Japanese", 
             "Koreans", 
             "Other Asians"
         ]
     }  
}

# Generate the list of unique regions
regions = list(region_data.keys())

periods = ["All periods", "1950-1965", "1966-1980", "1981-1995", "1996-2012"]

### Ethnicity comparison plot

In [7]:
create_ethnicity_comparison_graph2()

Dropdown(description='Region', options=('Hollywood', 'Bollywood', 'Eastasia'), value='Hollywood')

Dropdown(description='Period', options=('All periods', '1950-1965', '1966-1980', '1981-1995', '1996-2012'), va…

Dropdown(description='Genre', options=('All', 'Action/Adventure', 'Animation/Family', 'Comedy', 'Documentary',…

FigureWidget({
    'data': [{'hovertemplate': 'Real-world: %{x:.1f}%',
              'marker': {'color': 'teal', 'opacity': 0.8},
              'name': 'Real-world Population',
              'orientation': 'h',
              'type': 'bar',
              'uid': 'ab6744bc-5ab0-4b46-921e-89c5f087ce21',
              'x': array([12.4  ,  0.78 ,  0.335,  2.9  , 68.325,  3.075, 10.   ]),
              'y': array(['African Americans', 'American Indians', 'Arab Americans',
                          'Asian Americans', 'Caucasian Americans', 'Jewish Americans',
                          'Latino Americans'], dtype=object)},
             {'hovertemplate': 'Hollywood Industry: %{x:.1f}%%',
              'marker': {'color': 'purple', 'opacity': 0.8},
              'name': 'Hollywood Industry',
              'orientation': 'h',
              'type': 'bar',
              'uid': '2e16497a-a551-4120-a192-f80c5a444f33',
              'x': [-16.37654730327144, -1.4727011494252873, -0.5360300618921309,
   

Plot saved successfully to: /Users/zaynebmellouli/MA1/ada-2024-project-advanceddestroyers0fall/src/plots_html/ethnicity_comparison.html


### Gender distribution comparison plot

In [8]:
create_gender_proportions_graph2()

VBox(children=(HBox(children=(Dropdown(description='Region', options=('Hollywood', 'Bollywood', 'Eastasia'), v…

FigureWidget({
    'data': [{'hovertemplate': 'Male: 76.0%<extra></extra>',
              'legendgroup': 'Male',
              'marker': {'color': 'teal'},
              'name': 'Male',
              'showlegend': True,
              'type': 'bar',
              'uid': 'fa2b6347-7ff6-43da-91f1-ad94b8698e9b',
              'x': [Action/Adventure],
              'y': [0.7599300797527819]},
             {'base': [0.7599300797527819],
              'hovertemplate': 'Female: 24.0%<extra></extra>',
              'legendgroup': 'Female',
              'marker': {'color': 'purple'},
              'name': 'Female',
              'showlegend': True,
              'type': 'bar',
              'uid': '8141a4ed-ca10-4221-85b2-8db37304e741',
              'x': [Action/Adventure],
              'y': [0.24006992024721802]},
             {'hovertemplate': 'Male: 67.7%<extra></extra>',
              'legendgroup': 'Male',
              'marker': {'color': 'teal'},
              'name': 'Male',
         

Plot saved successfully to: /Users/zaynebmellouli/MA1/ada-2024-project-advanceddestroyers0fall/src/plots_html/gender_comparison.html


### Age distribution comparison plot

In [9]:
create_age_distribution_graph2()

VBox(children=(HBox(children=(Dropdown(description='Region', options=('Hollywood', 'Bollywood', 'Eastasia'), v…

FigureWidget({
    'data': [{'fill': 'tozeroy',
              'fillcolor': 'rgba(0, 128, 128, 0.2)',
              'hovertemplate': 'Age %{x:.1f}: %{y:.2%}<extra>Real-world Population</extra>',
              'line': {'color': 'teal'},
              'mode': 'lines',
              'name': 'Real-world Population',
              'type': 'scatter',
              'uid': 'b23ad44f-dccd-4365-b04e-cc16142b0c76',
              'x': array([  0.       ,   0.2004008,   0.4008016, ...,  99.5991984,  99.7995992,
                          100.       ]),
              'y': array([1.83549919e-03, 1.88387342e-03, 1.93194606e-03, ..., 8.04239037e-06,
                          7.38738317e-06, 6.77843461e-06])},
             {'fill': 'tozeroy',
              'fillcolor': 'rgba(128, 0, 128, 0.2)',
              'hovertemplate': 'Age %{x:.1f}: %{y:.2%}<extra>Actors</extra>',
              'line': {'color': 'purple'},
              'mode': 'lines',
              'name': 'Actors',
              'type': 'scatter

Plot saved successfully to: /Users/zaynebmellouli/MA1/ada-2024-project-advanceddestroyers0fall/src/plots_html/age_distribution.html


## Other plots

While exploring the dataset and finding out how to plot the data and what aspects of the data we could make interactive and interesting for readers. 

In this section, you can find various types of plots for differents types of data.

In [32]:
east_asian_ethnicities = [
    'Taiwanese people', 'Koreans', 'Japanese Americans', 'Chinese Americans',
    'Hongkongers', 'Chinese Canadians', 'Chinese Singaporeans', 'Vietnamese people',
]

In [33]:
plot_interactive_ethnicity_donut(eastasia_data, 'actor_ethnicity_label', east_asian_ethnicities)

In [34]:
plot_interactive_genre_sunburst(eastasia_data, 'main_genre', min_count_threshold=400)

In [35]:
 plot_interactive_gender_distribution(bollywood_data, 'actor_gender')

In [36]:
plot_interactive_language_treemap(eastasia_data, 'languages')

In [37]:
plot_interactive_ethnicity_genre_heatmap(eastasia_data, 'main_genre', 'actor_ethnicity_label', east_asian_ethnicities, num_top_genres=10)