In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [25]:
electoral_data = pd.read_csv("V-Dem_electoral_democracy_index.csv")

In [26]:
indv_lib_data = pd.read_csv("V-Dem_indv_liberties_&_equality_before_law.csv")

In [27]:
lib_democracy_data = pd.read_csv("V-Dem_liberal_democracy_main_index.csv")

In [28]:
lib_institutions_data = pd.read_csv("V-Dem_liberal_institutions_index.csv")

In [29]:
electoral_data.head(10)

Unnamed: 0,Entity,Code,Year,electdem_vdem_owid,electdem_vdem_high_owid,electdem_vdem_low_owid
0,Afghanistan,AFG,1789,0.018,0.026,0.012
1,Afghanistan,AFG,1790,0.018,0.026,0.012
2,Afghanistan,AFG,1791,0.018,0.026,0.012
3,Afghanistan,AFG,1792,0.018,0.026,0.012
4,Afghanistan,AFG,1793,0.018,0.026,0.012
5,Afghanistan,AFG,1794,0.018,0.026,0.012
6,Afghanistan,AFG,1795,0.018,0.026,0.012
7,Afghanistan,AFG,1796,0.018,0.026,0.012
8,Afghanistan,AFG,1797,0.018,0.026,0.012
9,Afghanistan,AFG,1798,0.018,0.026,0.012


In [30]:
electoral_data_70=electoral_data[electoral_data['Year'] == 1970]

In [31]:
indv_lib_data_70=indv_lib_data[indv_lib_data['Year'] == 1970]
lib_democracy_data_70=lib_democracy_data[lib_democracy_data['Year'] == 1970]
lib_institutions_data_70=lib_institutions_data[lib_institutions_data['Year'] == 1970]

In [32]:
print(electoral_data_70.head(0),
indv_lib_data_70.head(0),
lib_democracy_data_70.head(0),
lib_institutions_data_70.head(0))

Empty DataFrame
Columns: [Entity, Code, Year, electdem_vdem_owid, electdem_vdem_high_owid, electdem_vdem_low_owid]
Index: [] Empty DataFrame
Columns: [Entity, Code, Year, indiv_libs_vdem_owid, indiv_libs_vdem_high_owid, indiv_libs_vdem_low_owid]
Index: [] Empty DataFrame
Columns: [Entity, Code, Year, libdem_vdem_owid]
Index: [] Empty DataFrame
Columns: [Entity, Code, Year, lib_dich_row_owid, lib_dich_high_row_owid, lib_dich_low_row_owid]
Index: []


In [33]:
electoral_data_70=electoral_data_70.drop(columns=['electdem_vdem_high_owid', 'electdem_vdem_low_owid'])
indv_lib_data_70=indv_lib_data_70.drop(columns=['indiv_libs_vdem_high_owid', 'indiv_libs_vdem_low_owid'])
lib_institutions_data_70=lib_institutions_data_70.drop(columns=['lib_dich_high_row_owid', 'lib_dich_low_row_owid'])

In [34]:
electoral_data_70.to_csv("electoral_data_70.csv")  
indv_lib_data_70.to_csv("indv_lib_data_70.csv")  
lib_institutions_data_70.to_csv("lib_institutions_data_70.csv")  
lib_democracy_data_70.to_csv("lib_democracy_data_70.csv")  

In [35]:
electoral_data_70.head()

Unnamed: 0,Entity,Code,Year,electdem_vdem_owid
181,Afghanistan,AFG,1970,0.16
414,Africa,,1970,0.175545
522,Albania,ALB,1970,0.173
644,Algeria,DZA,1970,0.084
766,Angola,AGO,1970,0.01


In [36]:
electoral_entities = electoral_data_70['Entity'].tolist()
indv_lib_entities = indv_lib_data_70['Entity'].tolist()
lib_dem_entities = lib_democracy_data_70['Entity'].tolist()
lib_inst_entities = lib_institutions_data_70['Entity'].tolist()



Next it will be useful to see what entities are included in some datasets and not others. One of the biggest challenges here will be people giving different names to the same entities e.g., countries sometimes change their name.

In [37]:
only_electoral_entities=[i for i in electoral_entities if (i not in indv_lib_entities) or (i not in lib_dem_entities) or (i not in lib_inst_entities)]
print(only_electoral_entities)

['Africa', 'Asia', 'Bahrain', 'Europe', 'North America', 'Oceania', 'South America', 'Timor', 'World']


In [38]:
only_indv_lib_entities=[i for i in indv_lib_entities if (i not in electoral_entities) or (i not in lib_dem_entities) or (i not in lib_inst_entities)]
print(only_indv_lib_entities)

['Bahrain', 'Timor']


In [39]:
only_lib_dem_entities=[i for i in lib_dem_entities if (i not in indv_lib_entities) or (i not in electoral_entities) or (i not in lib_inst_entities)]
print(only_lib_dem_entities)

['Africa', 'Asia', 'Europe', 'North America', 'Oceania', 'South America', 'World']


In [40]:
only_lib_inst_entities=[i for i in lib_inst_entities if (i not in indv_lib_entities) or (i not in electoral_entities) or (i not in lib_dem_entities)]
print(only_lib_inst_entities)

[]


Since we're not interested in anything at the continent level or "world" level all those entities may be removed. Issues where other datasets are refering to Bahrain and Timor by different names do not appear to be present.

In [41]:
electoral_data_70 = electoral_data_70[electoral_data_70['Entity'] != ('Africa' or 'Asia' or 'Europe' or 'North America' or 'Oceania' or 'South America' or 'World' )]

In [42]:
lib_democracy_data_70 = lib_democracy_data_70[lib_democracy_data_70['Entity'] != ('Africa' or 'Asia' or 'Europe' or 'North America' or 'Oceania' or 'South America' or 'World' )]

It is known at this point that the year is 1970 so this redundant info will be removed

In [43]:
electoral_data_70=electoral_data_70.drop(columns=['Year'])
indv_lib_data_70=indv_lib_data_70.drop(columns=['Year'])
lib_institutions_data_70=lib_institutions_data_70.drop(columns=['Year'])
lib_democracy_data_70=lib_democracy_data_70.drop(columns=['Year'])

The country code offers far more accuracy than the name of the country so it should be kept in the data. However Palestine does not have a country code meaning it is better to merge on the entity

In [44]:
merge_1 = pd.merge(indv_lib_data_70,electoral_data_70, on='Entity')
merge_2 = pd.merge(lib_democracy_data_70,lib_institutions_data_70,on='Entity')
merge_3 = pd.merge(merge_1,merge_2,on='Entity',how='left')
merge_3.head()

Unnamed: 0,Entity,Code_x_x,indiv_libs_vdem_owid,Code_y_x,electdem_vdem_owid,Code_x_y,libdem_vdem_owid,Code_y_y,lib_dich_row_owid
0,Afghanistan,AFG,0.266,AFG,0.16,AFG,0.104,AFG,0.0
1,Albania,ALB,0.006,ALB,0.173,ALB,0.055,ALB,0.0
2,Algeria,DZA,0.428,DZA,0.084,DZA,0.065,DZA,0.0
3,Angola,AGO,0.024,AGO,0.01,AGO,0.035,AGO,0.0
4,Argentina,ARG,0.354,ARG,0.105,ARG,0.086,ARG,0.0


In [45]:
merge_3=merge_3.drop(columns=['Code_y_x','Code_x_y', 'Code_y_y'])
merge_3.head()

Unnamed: 0,Entity,Code_x_x,indiv_libs_vdem_owid,electdem_vdem_owid,libdem_vdem_owid,lib_dich_row_owid
0,Afghanistan,AFG,0.266,0.16,0.104,0.0
1,Albania,ALB,0.006,0.173,0.055,0.0
2,Algeria,DZA,0.428,0.084,0.065,0.0
3,Angola,AGO,0.024,0.01,0.035,0.0
4,Argentina,ARG,0.354,0.105,0.086,0.0


In [46]:
merge_3.to_csv("V-Dem_data_1970.csv")  