In [1]:
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
from datetime import datetime
import pandas_datareader.data as web
from plotly.offline import init_notebook_mode, plot

init_notebook_mode(connected=True)

In [2]:
bechdel = pd.read_csv("../../data/bechdel_clean.csv")
crew = pd.read_csv("../../data/crew_clean.csv")
cast = pd.read_csv("../../data/cast_clean.csv")

In [3]:
bechdel.head()

Unnamed: 0.1,Unnamed: 0,tconst,id,title,bechdel_rating,year,averageRating,numVotes,runtimeMinutes,genres,primaryTitle
0,3,3.0,5433,Pauvre Pierrot,0,1892,6.6,947.0,4,"Animation,Comedy,Romance",Pauvre Pierrot
1,4,12.0,6199,"Arrival of a Train, The",0,1896,7.4,8160.0,1,"Documentary,Short",The Arrival of a Train
2,5,14.0,5444,Tables Turned on the Gardener,0,1895,7.2,3564.0,1,"Comedy,Short",Tables Turned on the Gardener
3,6,91.0,4982,"House of the Devil, The",0,1896,6.7,2023.0,3,"Horror,Short",The House of the Devil
4,7,131.0,5406,Une nuit terrible,0,1896,5.9,768.0,1,"Comedy,Horror,Short",A Terrible Night


### Ratings by 5 year groups

In [4]:
# create dataframes by bechdel ratings
rating_0 = bechdel[bechdel['bechdel_rating'] == 0]
rating_1 = bechdel[bechdel['bechdel_rating'] == 1]
rating_2 = bechdel[bechdel['bechdel_rating'] == 2]
rating_3 = bechdel[bechdel['bechdel_rating'] == 3]

In [5]:
# group by year and count 
rating_0_cnt = pd.DataFrame(rating_0.groupby(['year']).size())
rating_0_cnt = rating_0_cnt.reset_index()
rating_0_cnt.columns = ['year', 'cnt']

rating_1_cnt = pd.DataFrame(rating_1.groupby(['year']).size())
rating_1_cnt = rating_1_cnt.reset_index()
rating_1_cnt.columns = ['year', 'cnt']

rating_2_cnt = pd.DataFrame(rating_2.groupby(['year']).size())
rating_2_cnt = rating_2_cnt.reset_index()
rating_2_cnt.columns = ['year', 'cnt']

rating_3_cnt = pd.DataFrame(rating_3.groupby(['year']).size())
rating_3_cnt = rating_3_cnt.reset_index()
rating_3_cnt.columns = ['year', 'cnt']

In [6]:
bechdel['5_year'] = (bechdel['year']//5)*5
bechdel

Unnamed: 0.1,Unnamed: 0,tconst,id,title,bechdel_rating,year,averageRating,numVotes,runtimeMinutes,genres,primaryTitle,5_year
0,3,3.0,5433,Pauvre Pierrot,0,1892,6.6,947.0,4,"Animation,Comedy,Romance",Pauvre Pierrot,1890
1,4,12.0,6199,"Arrival of a Train, The",0,1896,7.4,8160.0,1,"Documentary,Short",The Arrival of a Train,1895
2,5,14.0,5444,Tables Turned on the Gardener,0,1895,7.2,3564.0,1,"Comedy,Short",Tables Turned on the Gardener,1895
3,6,91.0,4982,"House of the Devil, The",0,1896,6.7,2023.0,3,"Horror,Short",The House of the Devil,1895
4,7,131.0,5406,Une nuit terrible,0,1896,5.9,768.0,1,"Comedy,Horror,Short",A Terrible Night,1895
5,8,211.0,5410,"Astronomer&#39;s Dream; or, The Man in the Moo...",0,1898,7.5,2433.0,3,"Comedy,Fantasy,Horror",A Trip to the Moon,1895
6,9,230.0,5914,Cinderella,3,1899,6.5,1054.0,6,"Drama,Fantasy,Short",Cinderella,1895
7,10,246.0,4994,A Turn of the Century Illusionist,0,1899,6.5,763.0,1,Short,A Turn of the Century Illusionist,1895
8,11,272.0,4344,As Seen Through A Telescope (a.k.a The Profess...,0,1900,5.6,526.0,1,"Comedy,Short",As Seen Through a Telescope,1900
9,12,300.0,4271,"Enchanted Drawing, The",0,1900,7.0,847.0,2,"Animation,Comedy,Fantasy",The Enchanted Drawing,1900


In [7]:
ratings_by_year = pd.DataFrame(bechdel.groupby(['5_year', 'bechdel_rating']).size()).reset_index()
ratings_by_year.columns = ['5_year', 'bechdel_rating', 'num_movies']


num_movies_by_year = pd.DataFrame(bechdel.groupby(['5_year']).size()).reset_index()
num_movies_by_year.columns = ['5_year', 'tot_movies']

In [8]:
movies = pd.merge(ratings_by_year, num_movies_by_year, on='5_year', how='left')
movies['percent'] = movies['num_movies']/movies['tot_movies']

In [9]:
movies_pct_0 = movies[(movies['bechdel_rating'] == 0) & (movies['5_year'] > 1926) & (movies['5_year'] < 2018)]
movies_pct_1 = movies[(movies['bechdel_rating'] == 1) & (movies['5_year'] > 1926) & (movies['5_year'] < 2018)]
movies_pct_2 = movies[(movies['bechdel_rating'] == 2) & (movies['5_year'] > 1926) & (movies['5_year'] < 2018)]
movies_pct_3 = movies[(movies['bechdel_rating'] == 3) & (movies['5_year'] > 1926) & (movies['5_year'] < 2018)]

In [10]:
# Create traces
trace0 = go.Bar(
    x = movies_pct_0['5_year'],
    y = movies_pct_0.percent,
    name = '0 - Failed',
    marker=dict(
        color='rgb(183, 42, 42)',
        line=dict(
            color='rgb(73, 59, 59)',
            width=1.5),
        ),
)
trace1 = go.Bar(
    x = movies_pct_1['5_year'],
    y = movies_pct_1.percent,
    name = '1 - Faied but has two women in it',
    marker=dict(
        color='rgb(219, 65, 65)',
        line=dict(
            color='rgb(73, 59, 59)',
            width=1.5),
        ),
)
trace2 = go.Bar(
    x = movies_pct_2['5_year'],
    y = movies_pct_2.percent,
    name = '2 - Faied but has two women in it who talk to each other',
    marker=dict(
        color='rgb(242, 92, 92)',
        line=dict(
            color='rgb(73, 59, 59)',
            width=1.5),
        ),
)
trace3 = go.Bar(
    x = movies_pct_3['5_year'],
    y = movies_pct_3.percent,
    name = '3 - Passed, has two women in it who talk to each other about something other than a man',
    marker=dict(
        color='rgb(95, 194, 204)',
        line=dict(
            color='rgb(73, 59, 59)',
            width=1.5),
        ),
)
data = [trace3, trace2, trace1, trace0]
layout = go.Layout(title='Bechdel Pass Rate by Year', barmode='stack', 
                   titlefont=dict(size =18, family='Oswald, sans-serif'),
                   yaxis=dict(tickformat="%"), 
                   legend = dict(orientation = "h",xanchor = "center",x = 0.5))
fig = go.Figure(data=data, layout=layout)

plotly.offline.iplot(fig)
plotly.offline.plot(fig, filename='ratings_by_year.html', auto_open = False)

'file:///Users/smcnish/classes/msan622/project/new/bechdel-test/ratings_by_year.html'