<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Choropleth-visualzation" data-toc-modified-id="Choropleth-visualzation-1">Choropleth visualzation</a></span></li><li><span><a href="#Correlation-between-percent-women-in-congress-and-democracy-index" data-toc-modified-id="Correlation-between-percent-women-in-congress-and-democracy-index-2">Correlation between percent women in congress and democracy index</a></span><ul class="toc-item"><li><span><a href="#Overall-correlation" data-toc-modified-id="Overall-correlation-2.1">Overall correlation</a></span></li><li><span><a href="#Correlation-by-region" data-toc-modified-id="Correlation-by-region-2.2">Correlation by region</a></span></li></ul></li></ul></div>

In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
import geojson
import dataframe_image as dfi

In [2]:
country_codes = pd.read_html('https://www.iban.com/country-codes')

In [5]:
# helper function to get  each women's time mandate_end as a column
#def get_last_year_in_office(df):
#    functions = {'hrllo': (' (assassinated)','',regex=False), 
#                    'test': ('Incumbent',np.NaNm),
#                     fillna: df['Mandate start']}


In [6]:
# read in female heads of state dataset
df = pd.read_csv('data/female_heads_of_state.csv')
countries = pd.read_csv('data/list_of_countries.csv')

In [7]:
# add in the Alpha-3 code
df = df.merge(countries[['Country','Alpha-3 code']],on='Country')

# add colunms that have the start and end years of their terms
df['start_year'] = df['Mandate start'].apply(lambda x: f'19{x[-2:]}' if int(x[-2:]) > 39 else f'20{x[-2:]}')
df['Mandate end'] = df['Mandate end'].str.replace(' (assassinated)','',regex=False)
df['Mandate end'] = df['Mandate end'].replace('Incumbent',np.NaN)
df['Mandate end'] = df['Mandate end'].fillna(df['Mandate start'])
df['end_year'] = df['Mandate end'].apply(lambda x: f'19{x[-2:]}' if int(x[-2:]) > 39 else f'20{x[-2:]}')

In [8]:
# make it so that each year of each leader's term is a row
df['Year'] = [pd.date_range(x, y,freq='YS',inclusive='both') for x, y in zip(df.start_year, df.end_year)]
df = df.explode('Year')
df['Year'] = df['Year'].astype(str).apply(lambda x:x[:4])

In [9]:
# read in geojson for choropleth 
with open('data/countries.geojson') as f:
    gj = geojson.load(f)

# add in the 'id' which allows plotly to associate the cordinates to a teh countries in my dataframe
for feature in gj['features']:
    feature['id'] = feature['properties']['ISO_A3']

In [10]:
# only do three years to save memory b/c choropleth is to large otherwise
df_less = df[['Name','Country','Office','Head of state or government','Alpha-3 code','Year']].sort_values('Year')
df_less = df_less[df_less['Year'].isin([str(year) for year in range(2020,2023)])]

### Choropleth visualzation

In [11]:
# without animation (only 2022)
#fig = px.choropleth_mapbox(df[[df_less]==2022], geojson=gj, locations='Alpha-3 code', 
#                    color='Head of state or government', mapbox_style='carto-positron', zoom=1,
#                           color_discrete_sequence=px.colors.qualitative.Prism,
#                          hover_data=['Name','Country','Office'])

#fig.show()

In [12]:
# with animation (2020 - 2023)
#fig = px.choropleth_mapbox(df_less, geojson=gj, locations='Alpha-3 code', 
#                    color='Head of state or government', mapbox_style='carto-positron', zoom=1,
#                           color_discrete_sequence=px.colors.qualitative.Prism,
#                          hover_data=['Name','Country','Office'],animation_frame='Year')

#fig.show()

### Correlation between percent women in congress and democracy index

In [4]:
# read in democracy index data
democracy_indx = pd.read_csv('data/democracy_index_data.csv')
# read in the percent women in parliments data
women_in_parliments =  pd.read_csv('data/world_data_final.csv')


In [17]:
women_in_parliments = women_in_parliments.drop('Unnamed: 0',axis=1)

In [20]:
dfi.export(women_in_parliments,"women.png",max_rows=10,max_cols=4)

In [18]:
women_in_parliments

Unnamed: 0,COUNTRY,LOWER_OR_SINGLE_HOUSE_ELECTIONS,LOWER_OR_SINGLE_HOUSE_SEATS,LOWER_OR_SINGLE_HOUSE_WOMEN,LOWER_OR_SINGLE_HOUSE_PERCENT_W,UPPER_HOUSE_OR_SENATE_ELECTIONS,UPPER_HOUSE_OR_SENATE_SEATS,UPPER_HOUSE_OR_SENATE_WOMEN,UPPER_HOUSE_OR_SENATE_PERCENT_W,LOWER_OR_SINGLE_HOUSE_ELECTIONS_YEAR,UPPER_HOUSE_OR_SENATE_ELECTIONS_YEAR
0,Sweden,1998-09-01,349.0,149.0,0.426934,,0.0,0.0,,1998,NaT
1,Denmark,1998-03-01,179.0,67.0,0.374302,,0.0,0.0,,1998,NaT
2,Finland,1999-03-01,200.0,73.0,0.365000,,0.0,0.0,,1999,NaT
3,Netherlands,1998-05-01,150.0,54.0,0.360000,1999-05-01,75.0,20.0,0.266667,1998,1999
4,Norway,2001-09-01,165.0,59.0,0.357576,,0.0,0.0,,2001,NaT
...,...,...,...,...,...,...,...,...,...,...,...
2867,South Africa,2019-05-01,396.0,184.0,0.464646,2019-05-01,53.0,22.0,0.415094,2019,2019
2868,Germany,2017-09-01,709.0,223.0,0.314528,,69.0,23.0,0.333333,2017,NaT
2869,Zambia,2021-08-01,155.0,20.0,0.129032,,0.0,0.0,,2021,NaT
2870,United States of America,2020-11-01,430.0,122.0,0.283721,2020-11-01,100.0,24.0,0.240000,2020,2020


In [9]:
democracy_indx

Unnamed: 0,Region,2022 rank,Country,Regime type,2022,2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2008,2006,five_year_mean
0,North America,12,Canada,Full democracy,8.88,8.87,9.24,9.22,9.15,9.15,9.15,9.08,9.08,9.08,9.08,9.08,9.08,9.07,9.07,9.126
1,North America,30,United States,Flawed democracy,7.85,7.85,7.92,7.96,7.96,7.98,7.98,8.05,8.11,8.11,8.11,8.11,8.18,8.22,8.22,7.934
2,Western Europe,20,Austria,Full democracy,8.20,8.07,8.16,8.29,8.29,8.42,8.41,8.54,8.54,8.48,8.62,8.49,8.49,8.49,8.69,8.246
3,Western Europe,36,Belgium,Flawed democracy,7.64,7.51,7.51,7.64,7.78,7.78,7.77,7.93,7.93,8.05,8.05,8.05,8.05,8.16,8.15,7.644
4,Western Europe,37,Cyprus,Flawed democracy,7.38,7.43,7.56,7.59,7.59,7.59,7.65,7.53,7.40,7.29,7.29,7.29,7.29,7.70,7.60,7.552
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,Sub-Saharan Africa,92,Tanzania,Hybrid regime,5.10,5.10,5.10,5.16,5.41,5.47,5.76,5.58,5.77,5.77,5.88,5.64,5.64,5.28,5.18,5.248
163,Sub-Saharan Africa,130,Togo,Authoritarian,2.99,2.80,2.80,3.30,3.10,3.05,3.32,3.41,3.45,3.45,3.45,3.45,3.45,2.43,1.75,3.010
164,Sub-Saharan Africa,99,Uganda,Hybrid regime,4.55,4.48,4.94,5.02,5.20,5.09,5.26,5.22,5.22,5.22,5.16,5.13,5.05,5.03,5.14,4.946
165,Sub-Saharan Africa,78,Zambia,Hybrid regime,5.80,5.72,4.86,5.09,5.61,5.68,5.99,6.28,6.39,6.26,6.26,6.19,5.68,5.25,5.25,5.392


In [14]:
dfi.export(democracy_indx,"democracy.png",max_rows=10,max_cols=8)

In [14]:
# add a year column
women_in_parliments['LOWER_OR_SINGLE_HOUSE_ELECTIONS_YEAR'] = women_in_parliments['LOWER_OR_SINGLE_HOUSE_ELECTIONS'].astype(str).apply(lambda x:x[:4])
women_in_parliments['UPPER_HOUSE_OR_SENATE_ELECTIONS_YEAR'] = women_in_parliments['UPPER_HOUSE_OR_SENATE_ELECTIONS'].astype(str).apply(lambda x:x[:4])


In [15]:
years_list = [str(year) for year in range(2010,2023)]

In [16]:
# get list of years
years_list = [str(year) for year in range(2010,2023)]
years_list.insert(0,'2008')
years_list.insert(0,'2006')
# melt dataframe from wide to long
democracy_indx_melted = democracy_indx.melt(id_vars=['Region','2022 rank','Country','Regime type','five_year_mean'],value_vars=years_list,var_name='year',value_name='democracy_indx_score')

In [17]:
# get only lower house and rename columns
women_lower_house = women_in_parliments[['COUNTRY','LOWER_OR_SINGLE_HOUSE_ELECTIONS_YEAR','LOWER_OR_SINGLE_HOUSE_PERCENT_W']]
women_lower_house = women_lower_house.rename({'COUNTRY':'Country','LOWER_OR_SINGLE_HOUSE_ELECTIONS_YEAR':'year','LOWER_OR_SINGLE_HOUSE_PERCENT_W':'percent_W'},axis=1)

In [18]:
df = democracy_indx_melted.merge(women_lower_house,on=['Country','year'])

In [27]:
df = df.dropna()

In [28]:
df

Unnamed: 0,Region,2022 rank,Country,Regime type,five_year_mean,year,democracy_indx_score,percent_W
0,North America,12,Canada,Full democracy,9.126,2006,9.07,0.207792
1,North America,12,Canada,Full democracy,9.126,2006,9.07,0.213115
2,North America,12,Canada,Full democracy,9.126,2006,9.07,0.207792
3,Western Europe,20,Austria,Full democracy,8.246,2006,8.69,0.322404
4,Western Europe,20,Austria,Full democracy,8.246,2006,8.69,0.327869
...,...,...,...,...,...,...,...,...
976,Sub-Saharan Africa,145,Guinea,Authoritarian,2.956,2022,2.32,0.296296
977,Sub-Saharan Africa,94,Kenya,Hybrid regime,5.100,2022,5.05,0.234286
978,Sub-Saharan Africa,94,Kenya,Hybrid regime,5.100,2022,5.05,0.234286
979,Sub-Saharan Africa,71,Lesotho,Flawed democracy,6.484,2022,6.19,0.264463


#### Overall correlation

In [32]:
df[['democracy_indx_score','percent_W']].corr()

Unnamed: 0,democracy_indx_score,percent_W
democracy_indx_score,1.0,0.329408
percent_W,0.329408,1.0


#### Correlation by region

In [31]:
df.groupby('Region')[['democracy_indx_score','percent_W']].corr()

Unnamed: 0_level_0,Unnamed: 1_level_0,democracy_indx_score,percent_W
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Asia and Australasia,democracy_indx_score,1.0,0.218536
Asia and Australasia,percent_W,0.218536,1.0
Central and Eastern Europe,democracy_indx_score,1.0,-0.078507
Central and Eastern Europe,percent_W,-0.078507,1.0
Latin America and the Caribbean,democracy_indx_score,1.0,-0.037236
Latin America and the Caribbean,percent_W,-0.037236,1.0
Middle East and North Africa,democracy_indx_score,1.0,0.349616
Middle East and North Africa,percent_W,0.349616,1.0
North America,democracy_indx_score,1.0,-0.00642
North America,percent_W,-0.00642,1.0


In [29]:
fig = px.scatter(df, 
            x='democracy_indx_score',
            y='percent_W',
            template='plotly_white',    
            hover_data=['Country','year'],
            color='Region',
            animation_frame='year',
            title= 'Democracy Index vs Percent Women in Parliment across time',
            size='percent_W'
        )

fig.update_xaxes(title_text='Democracy Index Score')
fig.update_yaxes(title_text='Percent Women in Lower Legislative Branches')

fig.show()

In [38]:
fig = px.scatter(df, 
            x='democracy_indx_score',
            y='percent_W',
            template='plotly_white',    
            hover_data=['Country','year'],
            facet_row='Region',
            color='Region',
            animation_frame='year',
            title= 'Democracy Index vs Percent Women in Parliment across time',
            size='percent_W',
            height=800
        )

#fig.update_xaxes(title_text='Democracy Index Score')
#fig.update_yaxes(title_text='Percent Women in Lower Legislative Branches')

fig.show()