# Political Leaders’ Affiliation Database

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
from datetime import timedelta
import numpy as np

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [4]:
# Data: https://dataverse.harvard.edu/file.xhtml?fileId=5211722&version=6.0
# Codebook: https://dataverse.harvard.edu/file.xhtml?fileId=5211721&version=6.0

---

## OECD

In [5]:
oecd_src = pd.read_html(
    "https://www.oecd.org/about/document/ratification-oecd-convention.htm", header=0
)[1]

In [6]:
oecd_src.drop(["Unnamed: 0", "Unnamed: 3"], axis=1, inplace=True)

In [7]:
oecd_src.columns = oecd_src.columns.str.lower()

In [8]:
oecd_src["country"] = oecd_src["country"].str.title()

In [9]:
oecd_src["date"] = pd.to_datetime(oecd_src["date"])

In [10]:
oecd_list = oecd_src.country.to_list()

---

### Read Excel spreadsheet data

In [11]:
src = pd.read_excel("data/raw/PLAD_Oct_2021.xls")

### Process the dates

In [12]:
src["startdate"] = pd.to_datetime(src["startdate"])
src["enddate"] = pd.to_datetime(src["enddate"])
src["birthdate"] = pd.to_datetime(src["birthdate"], errors="coerce")

In [13]:
src.rename(
    columns={
        "adm0": "birth_country",
        "adm1": "birth_adm1",
        "adm2": "birth_adm1",
        "country": "country_led",
    },
    inplace=True,
)

### Tenure in office

In [14]:
src["days_in_office"] = src["enddate"] - src["startdate"]

In [15]:
src["years_in_office"] = (src["days_in_office"] / timedelta(days=365)).round(2)

In [16]:
src["months_in_office"] = (
    (src["enddate"] - src["startdate"]) / np.timedelta64(1, "M")
).round()

In [17]:
src.head()

Unnamed: 0,idacr,leader,plad_id,archigos_id,startdate,enddate,startyear,endyear,birth_country,birth_adm1,birth_adm1.1,country_led,continent,latitude,longitude,geoname,geo_precision,foreign_leader,ethnicity1,ethnicity2,ethnicity_geoepr1,ethnicity_geoepr2,ethnicitysource1,ethnicitysource2,ethnicity_precision,entry,exit,gender,yrborn,birthdate,uid,id_0,id_1,id_2,gid_0,gid_1,gid_2,edu_name,edu_r,birthplace_comment,ethnicity_comment,days_in_office,years_in_office,months_in_office
0,AFG,Najibullah,AFG_1986_1,8243611b-1e42-11e4-b4cd-db5882bf8def,1986-05-04,1992-04-16,1986,1992,Afghanistan,Paktya,Gardez,Afghanistan,Asia,33.601341,69.241592,Gardez,1,0,Pashtun,.,Pashtuns,.,https://www.washingtonpost.com/archive/politics/1986/05/13/afghan-leader-had-scrappy-partisan-past/ea703575-37d4-4524-972b-2ba9206c4e30/??noredirect=on,https://dailytimes.com.pk/103139/dr-mohammed-najibullah-the-afghan-prometheus/,2,Irregular,Irregular,M,1947,1947-08-06,184964,1,26,262,AFG,AFG.26_1,AFG.26.4_1,.,8,.,.,2174 days,5.96,71.0
1,AFG,Mojadidi,AFG_1992_1,8243611c-1e42-11e4-b4cd-db5882bf8def,1992-04-28,1992-06-28,1992,1992,Afghanistan,Kabul,Kabul City,Afghanistan,Asia,34.52813,69.172333,Kabul,1,0,Pashtun,.,Pashtuns,.,http://www.afghan-bios.info/index.php?option=com_afghanbios&id=1085&task=view&total=2314&start=1266&Itemid=2,.,2,Irregular,Regular,M,1926,NaT,184611,1,14,144,AFG,AFG.14_1,AFG.14.6_1,Degree in arabic and islamic law,6,.,.,61 days,0.17,2.0
2,AFG,Burhanuddin Rabbani,AFG_1992_2,8243611d-1e42-11e4-b4cd-db5882bf8def,1992-06-28,1996-09-27,1992,1996,Afghanistan,Badakhshan,.,Afghanistan,Asia,36.734772,70.811996,Badakshan Province,4,0,Tajik,.,Tajiks,.,https://www.theguardian.com/world/2011/sep/21/burhanuddin-rabbani-obituary,https://www.bbc.com/news/world-south-asia-14992229,1,Regular,Irregular,M,1939,NaT,.,1,1,.,AFG,AFG.1_1,AFG.1.5_1,.,7,.,.,1552 days,4.25,51.0
3,AFG,Mullah Omar,AFG_1996_1,8243611e-1e42-11e4-b4cd-db5882bf8def,1996-09-27,2001-11-13,1996,2001,Afghanistan,Kandahar,Khakrez,Afghanistan,Asia,32.01075,65.503349,"in the village of Chah-e Himmat,",3,0,Pashtun,.,Pashtuns,.,https://www.britannica.com/biography/Mohammad-Omar,https://www.washingtonpost.com/world/mullah-mohammad-omar-taliban-leader-said-to-have-died-in-2013/2015/07/29/1dd546bc-3606-11e5-9d0f-7865a67390ee_story.html,1,Irregular,Irregular,M,1959,NaT,185456,1,15,158,AFG,AFG.15_1,AFG.15.6_1,.,1,.,.,1873 days,5.13,62.0
4,AFG,Hamid Karzai,AFG_2001_1,8243611f-1e42-11e4-b4cd-db5882bf8def,2001-12-22,2014-09-29,2001,2014,Afghanistan,Kandahar,Kandahar City,Afghanistan,Asia,31.571779,65.729462,Karz,1,0,Pashtun,.,Pashtuns,.,https://www.bbc.com/news/world-south-asia-14141232,http://www.afghan-bios.info/index.php?option=com_afghanbios&id=846&task=view&total=5&start=2&Itemid=14,2,Foreign Imposition,Regular,M,1957,1957-12-24,185590,1,15,157,AFG,AFG.15_1,AFG.15.5_1,.,7,.,.,4664 days,12.78,153.0


### Make a copy of the dataframe for analysis

In [18]:
df = src[
    [
        "country_led",
        "idacr",
        "leader",
        "gender",
        "entry",
        "exit",
        "startdate",
        "enddate",
        "days_in_office",
        "years_in_office",
        "months_in_office",
        "startyear",
        "endyear",
        "birth_country",
        "birth_adm1",
        "birth_adm1",
        "continent",
        "latitude",
        "longitude",
        "geoname",
        "foreign_leader",
        "ethnicity1",
        "ethnicity2",
        "ethnicity_geoepr1",
        "ethnicity_geoepr2",
        "ethnicitysource1",
        "ethnicitysource2",
        "ethnicity_precision",
        "yrborn",
        "birthdate",
        "uid",
        "id_0",
        "id_1",
        "id_2",
        "gid_0",
        "gid_1",
        "gid_2",
        "edu_name",
        "edu_r",
        "birthplace_comment",
        "ethnicity_comment",
    ]
].copy()

#### Fix Lars' replacement character

In [19]:
df["leader"] = df["leader"].str.replace(
    "Lars L<U+FFFD>kke Rasmussen", "Lars Løkke Rasmussen", regex=False
)

In [20]:
df.query('leader == "Azali Assoumani"')

Unnamed: 0,country_led,idacr,leader,gender,entry,exit,startdate,enddate,days_in_office,years_in_office,months_in_office,startyear,endyear,birth_country,birth_adm1,birth_adm1.1,birth_adm1.2,birth_adm1.3,continent,latitude,longitude,geoname,foreign_leader,ethnicity1,ethnicity2,ethnicity_geoepr1,ethnicity_geoepr2,ethnicitysource1,ethnicitysource2,ethnicity_precision,yrborn,birthdate,uid,id_0,id_1,id_2,gid_0,gid_1,gid_2,edu_name,edu_r,birthplace_comment,ethnicity_comment
276,Comoros,COM,Azali Assoumani,M,Irregular,Regular,1999-04-30,2002-01-21,997 days,2.73,33.0,1999,2002,Comoros,Njazídja,.,Njazídja,.,Africa,-11.81209,43.28043,Mitsoudjé,0,Swahili,.,.,.,Dreher et al. (2019),.,2,1959,1959-01-01,170427,54,2,.,COM,COM.2_1,.,Military degree,6,"GADM reports no ADM2 regions, but geo_prec=1",.
278,Comoros,COM,Azali Assoumani,M,Regular,Regular,2002-05-27,2006-05-26,1460 days,4.0,48.0,2002,2006,Comoros,Njazídja,.,Njazídja,.,Africa,-11.81209,43.28043,Mitsoudjé,0,Swahili,.,.,.,Dreher et al. (2019),.,2,1959,1959-01-01,170427,54,2,.,COM,COM.2_1,.,Military degree,6,"GADM reports no ADM2 regions, but geo_prec=1",.
281,Comoros,COM,Azali Assoumani,M,Regular,Regular,2016-05-26,2019-02-03,983 days,2.69,32.0,2016,2019,Comoros,Njazídja,.,Njazídja,.,Africa,-11.81209,43.28043,Mitsoudjé,0,Swahili,.,.,.,Dreher et al. (2019),.,2,1959,1959-01-01,170427,54,2,.,COM,COM.2_1,.,Military degree (colonel),6,.,.
283,Comoros,COM,Azali Assoumani,M,Regular,Still in Office,2019-05-26,2020-12-31,585 days,1.6,19.0,2019,2020,Comoros,Njazídja,.,Njazídja,.,Africa,-11.81209,43.28043,Mitsoudjé,0,Swahili,.,.,.,Dreher et al. (2019),.,2,1959,1959-01-01,170427,54,2,.,COM,COM.2_1,.,Military degree (colonel),6,.,.


#### Just the OECD

In [21]:
oecd_df = df[df["country_led"].isin(oecd_list)].copy()

In [22]:
leaders_grouped = (
    oecd_df.groupby(["leader", "country_led", "birthdate"])["uid"]
    .count()
    .reset_index()
    .rename(columns={"uid": "stints_in_office"})
    .sort_values("stints_in_office", ascending=False)
).reset_index(drop=True)

In [23]:
leaders_grouped.query("stints_in_office >= 2")

Unnamed: 0,leader,country_led,birthdate,stints_in_office
0,Berlusconi,Italy,1936-09-29,3
1,Janez Jansa,Slovenia,1958-09-17,3
2,Bondevik,Norway,1947-09-03,3
3,Drnovsek,Slovenia,1950-05-17,2
4,Simonetta Sommaruga,Switzerland,1960-05-14,2
5,Brundtland,Norway,1939-04-20,2
6,Shinzo Abe,Japan,1954-09-21,2
7,Carlsson,Sweden,1934-11-09,2
8,Delamuraz,Switzerland,1936-04-01,2
9,Rudd,Australia,1957-09-21,2


In [24]:
leaders_list = leaders_grouped.query("stints_in_office >= 2")["leader"].to_list()

#### Just those in the dataframe with more than one stint

In [25]:
oecd_leaders_multiple_stints = oecd_df[oecd_df["leader"].isin(leaders_list)]

----

## Export

In [26]:
df.to_csv("data/processed/harvard_pol_leaders_db.csv", index=False)

In [27]:
oecd_leaders_multiple_stints.to_csv(
    "data/processed/harvard_pol_leaders_in_oecd_db_multiple_stints.csv", index=False
)