In [5]:
import pandas as pd
import plotly.express as px
import numpy as np
import geojson
import dataframe_image as dfi
import warnings

### Box Plot of democracy index data

In [25]:
# read in democracy data
dem_idx = pd.read_csv('../data/democracy_index_data.csv')

In [55]:
# 
fig = px.box(dem_idx, 
            y="five_year_mean",
            color="region",
            template='plotly_white',    
            color_discrete_sequence=px.colors.qualitative.Prism,
            title= 'Box Plots of the Democracy Index Score by Region',
            width=800,
            height=400,
            )
fig.update_yaxes(title_text='Democracy Index Score (5 year mean)')
fig.show()

### Histogram of women data

In [54]:
women = pd.read_csv('../data/world_data_2022.csv')

In [55]:
fig = px.histogram(women, x='Percent Women',
                    facet_col='House',
                    nbins=10,
                    width=800,
                    height=400,
                    template='plotly_white',    
                    color_discrete_sequence=[px.colors.qualitative.Prism[1],px.colors.qualitative.Prism[2]],
                    title= 'Histogram of the Percent Women in Parliaments in Countries across the World in 2022',)

facet_titles = ['Lower Houses','Upper Houses']
for i, a in enumerate(fig.layout.annotations):
    a.text = facet_titles[i]

fig.show()

### Correlation between percent women in congress and democracy index

In [12]:
# read in joined dataset
df = pd.read_csv('../data/joined_dataset.csv')
df = df.dropna()

#### Overall correlation

In [15]:
df[['democracy_index_score','percent_W']].corr()

Unnamed: 0,democracy_index_score,percent_W
democracy_index_score,1.0,0.284065
percent_W,0.284065,1.0


#### Correlation by region

In [17]:
df.groupby('region')[['democracy_index_score','percent_W']].corr()

Unnamed: 0_level_0,Unnamed: 1_level_0,democracy_index_score,percent_W
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Asia and Australasia,democracy_index_score,1.0,0.098991
Asia and Australasia,percent_W,0.098991,1.0
Central and Eastern Europe,democracy_index_score,1.0,-0.102758
Central and Eastern Europe,percent_W,-0.102758,1.0
Latin America and the Caribbean,democracy_index_score,1.0,0.09664
Latin America and the Caribbean,percent_W,0.09664,1.0
Middle East and North Africa,democracy_index_score,1.0,0.271621
Middle East and North Africa,percent_W,0.271621,1.0
North America,democracy_index_score,1.0,-0.144188
North America,percent_W,-0.144188,1.0


In [23]:
fig = px.scatter(df, 
            x='democracy_index_score',
            y='percent_W',
            template='plotly_white',    
            hover_data=['country','year'],
            color='region',
            color_discrete_sequence=px.colors.qualitative.Prism,
            animation_frame='year',
            title= 'Democracy Index vs Percent Women in Parliment across time',
            size='percent_W',
            width=1200,
            height=800
        )

fig.update_xaxes(title_text='Democracy Index Score')
fig.update_yaxes(title_text='Percent Women in Lower Legislative Branches')

fig.show()

In [2]:
country_codes = pd.read_html('https://www.iban.com/country-codes')

In [5]:
# helper function to get  each women's time mandate_end as a column
#def get_last_year_in_office(df):
#    functions = {'hrllo': (' (assassinated)','',regex=False), 
#                    'test': ('Incumbent',np.NaNm),
#                     fillna: df['Mandate start']}


In [6]:
# read in female heads of state dataset
df = pd.read_csv('data/female_heads_of_state.csv')
countries = pd.read_csv('data/list_of_countries.csv')

In [7]:
# add in the Alpha-3 code
df = df.merge(countries[['Country','Alpha-3 code']],on='Country')

# add colunms that have the start and end years of their terms
df['start_year'] = df['Mandate start'].apply(lambda x: f'19{x[-2:]}' if int(x[-2:]) > 39 else f'20{x[-2:]}')
df['Mandate end'] = df['Mandate end'].str.replace(' (assassinated)','',regex=False)
df['Mandate end'] = df['Mandate end'].replace('Incumbent',np.NaN)
df['Mandate end'] = df['Mandate end'].fillna(df['Mandate start'])
df['end_year'] = df['Mandate end'].apply(lambda x: f'19{x[-2:]}' if int(x[-2:]) > 39 else f'20{x[-2:]}')

In [8]:
# make it so that each year of each leader's term is a row
df['Year'] = [pd.date_range(x, y,freq='YS',inclusive='both') for x, y in zip(df.start_year, df.end_year)]
df = df.explode('Year')
df['Year'] = df['Year'].astype(str).apply(lambda x:x[:4])

In [9]:
# read in geojson for choropleth 
with open('data/countries.geojson') as f:
    gj = geojson.load(f)

# add in the 'id' which allows plotly to associate the cordinates to a teh countries in my dataframe
for feature in gj['features']:
    feature['id'] = feature['properties']['ISO_A3']

In [10]:
# only do three years to save memory b/c choropleth is to large otherwise
df_less = df[['Name','Country','Office','Head of state or government','Alpha-3 code','Year']].sort_values('Year')
df_less = df_less[df_less['Year'].isin([str(year) for year in range(2020,2023)])]

### Choropleth visualzation

In [11]:
# without animation (only 2022)
#fig = px.choropleth_mapbox(df[[df_less]==2022], geojson=gj, locations='Alpha-3 code', 
#                    color='Head of state or government', mapbox_style='carto-positron', zoom=1,
#                           color_discrete_sequence=px.colors.qualitative.Prism,
#                          hover_data=['Name','Country','Office'])

#fig.show()

In [12]:
# with animation (2020 - 2023)
#fig = px.choropleth_mapbox(df_less, geojson=gj, locations='Alpha-3 code', 
#                    color='Head of state or government', mapbox_style='carto-positron', zoom=1,
#                           color_discrete_sequence=px.colors.qualitative.Prism,
#                          hover_data=['Name','Country','Office'],animation_frame='Year')

#fig.show()

In [21]:
import plotly.graph_objects as go

In [23]:
fig = px.scatter(df, 
            x='democracy_indx_score',
            y='percent_W',
            template='plotly_white',    
            hover_data=['Country','year'],
            facet_row='Region',
            color='Region',
            color_discrete_sequence=px.colors.qualitative.Prism,
            animation_frame='year',
            title= 'Democracy Index vs Percent Women in Parliment across time',
            size='percent_W',
            height=800
        )
for axis in fig.layout:
    if type(fig.layout[axis]) == go.layout.YAxis:
        fig.layout[axis].title.text = ''
    if type(fig.layout[axis]) == go.layout.XAxis:
        fig.layout[axis].title.text = ''
 
# ensure that each chart has its own y range and tick labels
fig.update_yaxes(matches=None, showticklabels=True, visible=True)
#fig.update_xaxes(title_text='Democracy Index Score')
#fig.update_yaxes(title_text='Percent Women in Lower Legislative Branches')

fig.show()