In [40]:
import pandas as pd
from scipy.stats import pearsonr, spearmanr
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [41]:
ties_df = pd.read_excel("TIESv4-1.xls")
cow_df = pd.read_csv("COW-country-codes.csv")
plty_df = pd.read_excel("POLITY5-PRC.xlsx")

In [42]:
autocratic_score = plty_df[plty_df['Indicator'] == 'Polity database: Combined Polity Score']

# Identify columns that represent years (digit-only)
year_columns = [col for col in autocratic_score.columns if col.isdigit()]

# Melt the Polity data into long format
melted_df_auto = autocratic_score.melt(
    id_vars=['Economy Name'],
    value_vars=year_columns,
    var_name='Year',
    value_name='Value'
)

# Convert Year and Value to numeric
melted_df_auto['Year'] = pd.to_numeric(melted_df_auto['Year'], errors='coerce')
melted_df_auto = melted_df_auto.dropna(subset=['Year'])
melted_df_auto['Year'] = melted_df_auto['Year'].astype(int)

melted_df_auto['Value'] = pd.to_numeric(melted_df_auto['Value'], errors='coerce')
melted_df_auto = melted_df_auto.dropna(subset=['Value'])

In [43]:
# Function to standardize country strings
def safe_upper_strip(x):
    if isinstance(x, str):
        return x.upper().strip()
    return np.nan

# Standardize country names in melted_df_auto and cow_df
melted_df_auto['Economy_Name_standardized'] = melted_df_auto['Economy Name'].apply(safe_upper_strip)
cow_df['StateNme_standardized'] = cow_df['StateNme'].apply(safe_upper_strip)

melted_df_auto = melted_df_auto.dropna(subset=['Economy_Name_standardized'])
cow_df = cow_df.dropna(subset=['StateNme_standardized'])

In [None]:
# Map from country name to CCode
country_mapping = dict(zip(cow_df['StateNme_standardized'], cow_df['CCode']))
melted_df_auto['Country_Code'] = melted_df_auto['Economy_Name_standardized'].map(country_mapping)
melted_df_auto = melted_df_auto.dropna(subset=['Country_Code'])
melted_df_auto['Country_Code'] = melted_df_auto['Country_Code'].astype(int)

# Convert targetstate in ties_df to numeric Country_Code
ties_df['Country_Code'] = pd.to_numeric(ties_df['targetstate'], errors='coerce')
ties_df = ties_df.dropna(subset=['Country_Code'])
ties_df['Country_Code'] = ties_df['Country_Code'].astype(int)

# Convert startyear in ties_df to numeric
ties_df['startyear'] = pd.to_numeric(ties_df['startyear'], errors='coerce')
ties_df = ties_df.dropna(subset=['startyear'])
ties_df['startyear'] = ties_df['startyear'].astype(int)

# Pivot polity data to get a wide format with years as columns
# Index by Country_Code for easy merging
polity_wide = melted_df_auto.pivot(index='Country_Code', columns='Year', values='Value')
polity_wide = polity_wide.reindex(sorted(polity_wide.columns), axis=1)