In [1]:
import json
import pandas as pd

# Data Preparation

In [2]:
ISO_FILE = "iso/iso-3166-1-alpha-2-en.json"
ELECTIONS_FILE = "parlgov2020-dev/elections.csv"
PARTIES_FILE = "parlgov2020-dev/parties.csv"
OUT = "parlgov2020.csv"

In [3]:
NONEUROPEAN_COUNTRIES = ["Australia", "Canada", "Turkey", "Israel", "New Zealand", "Japan"]

In [4]:
REGIONS = {
    "central": ["Bulgaria", "Croatia", "Czech Republic", "Hungary", "Poland", "Romania", "Slovakia", "Slovenia"],
    "north": ["Denmark", "Estonia", "Finland", "Iceland", "Latvia", "Lithuania", "Norway", "Sweden"],
    "south": ["Cyprus", "Greece", "Italy", "Malta", "Portugal", "Spain"],
    "west": ["Austria", "Belgium", "France", "Germany", "Ireland", "Luxembourg", "Netherlands", "Switzerland", "United Kingdom"]
}

## Filter data on elections

In [5]:
elections = pd.read_csv(ELECTIONS_FILE, parse_dates=["election_date"])
elections.head()

Unnamed: 0,country_name_short,country_name,election_type,election_date,vote_share,seats,seats_total,party_name_short,party_name,party_name_english,left_right,country_id,election_id,previous_parliament_election_id,previous_cabinet_id,party_id
0,AUS,Australia,parliament,1901-03-30,44.4,32.0,75,PP,Protectionist Party,Protectionist Party,7.4,33,731,,,1898
1,AUS,Australia,parliament,1901-03-30,34.2,26.0,75,FTP,Free Trade Party,Free Trade Party,6.0,33,731,,,1938
2,AUS,Australia,parliament,1901-03-30,19.4,15.0,75,ALP,Australian Labor Party,Australian Labor Party,3.8833,33,731,,,1253
3,AUS,Australia,parliament,1901-03-30,1.4,1.0,75,none,no party affiliation,no party affiliation,,33,731,,,1396
4,AUS,Australia,parliament,1901-03-30,0.6,1.0,75,one-seat,one seat,one seat,,33,731,,,2299


In [6]:
# only european countries
countries_keep = set(elections.country_name.unique().tolist()) - set(NONEUROPEAN_COUNTRIES)
elections = elections[elections.country_name.isin(countries_keep)]

In [7]:
# only parliamentary elections
elections = elections[elections.election_type == "parliament"]

In [8]:
# only actual parties
elections = elections[~elections.party_name_english.isin(["no seat", "no party affiliation", "one seat"])]

In [9]:
# only those parties that actually won seats in parliament
elections = elections[(elections.seats > 0) & (elections.vote_share > 0)]
elections.shape

(4291, 16)

**Only keep the first successful election for each party**

In [10]:
new_parties = elections.copy()
for (country_id, party_id), _df in elections.groupby(["country_id", "party_id"]):    
    dates = sorted(_df.election_date.tolist())
    new_parties = new_parties.drop(_df[_df.election_date != dates[0]].index)
new_parties.shape

(802, 16)

In [11]:
new_parties.party_id.unique().size

802

In [12]:
new_parties = new_parties.set_index("party_id")

In [13]:
# only interested in the after-war period
new_parties = new_parties[new_parties.election_date.dt.year >= 1945]

In [14]:
new_parties.shape

(610, 15)

### Add information about political parties

In [15]:
parties = pd.read_csv(PARTIES_FILE)
parties = parties.set_index("party_id")
parties.head()

Unnamed: 0_level_0,country_name_short,country_name,party_name_short,party_name_english,party_name,party_name_ascii,family_name_short,family_name,left_right,state_market,...,cmp,euprofiler,ees,castles_mair,huber_inglehart,ray,benoit_laver,chess,country_id,family_id
party_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1935,AUS,Australia,ACP-V,Australian Country Party (Vic),Australian Country Party (Vic),Australian Country Party (Vic),con,Conservative,7.4,6.4,...,,,,,,,,,33,26
120,AUS,Australia,AD,Australian Democrats,Australian Democrats,Australian Democrats,soc,Social democracy,3.7004,3.3684,...,63321.0,,,102.0,203.0,,4703.0,,33,11
751,AUS,Australia,AG,Australian Greens,Australian Greens,Australian Greens,eco,Green/Ecologist,1.5439,1.2982,...,,,,,,,4739.0,,33,19
1253,AUS,Australia,ALP,Australian Labor Party,Australian Labor Party,Australian Labor Party,soc,Social democracy,3.8833,4.0351,...,63320.0,,,101.0,,,4721.0,,33,11
1760,AUS,Australia,AP,Australia Party,Australia Party,Australia Party,lib,Liberal,6.0,6.7,...,,,,,,,,,33,6


In [16]:
parties.columns

Index(['country_name_short', 'country_name', 'party_name_short',
       'party_name_english', 'party_name', 'party_name_ascii',
       'family_name_short', 'family_name', 'left_right', 'state_market',
       'liberty_authority', 'eu_anti_pro', 'cmp', 'euprofiler', 'ees',
       'castles_mair', 'huber_inglehart', 'ray', 'benoit_laver', 'chess',
       'country_id', 'family_id'],
      dtype='object')

In [17]:
keep_cols = ["state_market", "liberty_authority", "eu_anti_pro", "family_name_short", "family_name", "party_name_ascii"]
party_positions = parties.drop(columns=list(set(parties.columns) - set(keep_cols)))
party_positions.head()

Unnamed: 0_level_0,party_name_ascii,family_name_short,family_name,state_market,liberty_authority,eu_anti_pro
party_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1935,Australian Country Party (Vic),con,Conservative,6.4,7.0,
120,Australian Democrats,soc,Social democracy,3.3684,1.1579,
751,Australian Greens,eco,Green/Ecologist,1.2982,0.5614,
1253,Australian Labor Party,soc,Social democracy,4.0351,3.2632,
1760,Australia Party,lib,Liberal,6.7,3.6,


In [18]:
# join with information on parties
new_parties = new_parties.join(party_positions)
new_parties.shape

(610, 21)

**Add ISO country codes**

In [19]:
with open(ISO_FILE) as f:
    iso = json.load(f)
iso = {item["Name"]: item["Code"] for item in iso}

In [20]:
new_parties["country_code"] = new_parties.country_name.apply(lambda country: iso[country].lower())

**Add geographical classification**

In [21]:
MAP_COUNTRY_TO_REGION = {}
for region, countries in REGIONS.items():
    for country in countries:
        MAP_COUNTRY_TO_REGION[country] = region

In [22]:
new_parties["region"] = new_parties.country_name.apply(lambda country: MAP_COUNTRY_TO_REGION[country])

In [23]:
new_parties.country_id.unique().size

31

**Is a party is represented in today's parliament?**

In [24]:
most_recent_election_id = {}
for country_id, _df in elections.groupby("country_id"):
    dates = sorted(_df.election_date.tolist())
    date = dates[-1]
    election_id = _df[_df.election_date == date].election_id.tolist()[0]
    most_recent_election_id[country_id] = election_id
len(most_recent_election_id)

31

In [25]:
is_alive = []
for party_id in new_parties.index:
    _df = elections[elections.party_id == party_id]
    country_id = _df.country_id.tolist()[0]
    election_id = most_recent_election_id[country_id]
    election_ids = elections[elections.party_id == party_id].election_id.tolist()
    is_alive.append(int(election_id in election_ids))
new_parties["is_alive"] = is_alive

In [26]:
new_parties.head()

Unnamed: 0_level_0,country_name_short,country_name,election_type,election_date,vote_share,seats,seats_total,party_name_short,party_name,party_name_english,...,previous_cabinet_id,party_name_ascii,family_name_short,family_name,state_market,liberty_authority,eu_anti_pro,country_code,region,is_alive
party_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
769,AUT,Austria,parliament,1945-11-25,5.4,4.0,165,KPO,Kommunistische Partei Österreichs,Communist Party of Austria,...,452.0,Kommunistische Partei Oesterreichs,com,Communist/Socialist,1.4,3.0,0.0,at,west,0
50,AUT,Austria,parliament,1949-10-09,11.7,16.0,165,FPO,Freiheitliche Partei Österreichs,Freedom Party of Austria,...,884.0,Freiheitliche Partei Oesterreichs,right,Right-wing,6.3291,8.5149,1.9815,at,west,1
1429,AUT,Austria,parliament,1986-11-23,4.8,8.0,183,Gruene,Die Grünen – Die Grüne Alternative,The Greens -- The Green Alternative,...,828.0,Die Gruenen -- Die Gruene Alternative,eco,Green/Ecologist,2.7489,1.4308,6.6845,at,west,1
955,AUT,Austria,parliament,1994-10-09,6.0,11.0,183,LIF,Liberales Forum,Liberal Forum,...,524.0,Liberales Forum,lib,Liberal,7.3667,1.8667,9.6125,at,west,0
1536,AUT,Austria,parliament,2006-10-01,4.1,7.0,183,BZO,Bündnis Zukunft Österreich,Alliance for the Future of Austria,...,888.0,Buendnis Zukunft Oesterreich,right,Right-wing,6.0,8.83,2.5,at,west,0


In [27]:
new_parties.to_csv(OUT)