# Estimating the macroeconomic effect of conflicts with local projections

***Author:*** N. Julitz

***Data:*** (UCDP) Georeferenced Event Dataset, GDP per capita (IMF), World Population Data (Kaggle)


***Further info:***

### First, we need to import the relevant libraries

In [258]:
import pandas as pd
import numpy as np
from linearmodels.panel import PanelOLS
from linearmodels.panel import compare
import statsmodels.api as sm
import datetime as datetime

### Then, we need to import the datasets we want to use

In [259]:
ucdp = pd.read_csv("UCDP.csv", low_memory=False)

gdp = pd.read_excel("GDP.xlsx")

population = pd.read_csv("population.csv", sep=",")

## Making the ucdp-dataset ready for statistical analysis

In [260]:
ucdp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 349733 entries, 0 to 349732
Data columns (total 49 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   id                 349733 non-null  int64  
 1   relid              349733 non-null  object 
 2   year               349733 non-null  int64  
 3   active_year        349733 non-null  int64  
 4   code_status        349733 non-null  object 
 5   type_of_violence   349733 non-null  int64  
 6   conflict_dset_id   349733 non-null  int64  
 7   conflict_new_id    349733 non-null  int64  
 8   conflict_name      349733 non-null  object 
 9   dyad_dset_id       349733 non-null  int64  
 10  dyad_new_id        349733 non-null  int64  
 11  dyad_name          349733 non-null  object 
 12  side_a_dset_id     349733 non-null  int64  
 13  side_a_new_id      349733 non-null  int64  
 14  side_a             349733 non-null  object 
 15  side_b_dset_id     349733 non-null  int64  
 16  si

In [261]:
ucdp["total_deaths"] = ucdp.deaths_a + ucdp.deaths_b + ucdp.deaths_civilians # make a column for the total deaths

ucdp["conflict_dummy"] = ucdp["total_deaths"].apply(lambda x: 1 if x > 1000 else 0) # all conflicts, that bypass a threshold of 1,000 deaths, are coded to 1 and all others are coded to 0

len(ucdp[ucdp["conflict_dummy"] == 1]) # the total threshold identifies 202 conflicts

202

In [262]:
ucdp.columns

Index(['id', 'relid', 'year', 'active_year', 'code_status', 'type_of_violence',
       'conflict_dset_id', 'conflict_new_id', 'conflict_name', 'dyad_dset_id',
       'dyad_new_id', 'dyad_name', 'side_a_dset_id', 'side_a_new_id', 'side_a',
       'side_b_dset_id', 'side_b_new_id', 'side_b', 'number_of_sources',
       'source_article', 'source_office', 'source_date', 'source_headline',
       'source_original', 'where_prec', 'where_coordinates',
       'where_description', 'adm_1', 'adm_2', 'latitude', 'longitude',
       'geom_wkt', 'priogrid_gid', 'country', 'country_id', 'region',
       'event_clarity', 'date_prec', 'date_start', 'date_end', 'deaths_a',
       'deaths_b', 'deaths_civilians', 'deaths_unknown', 'best', 'high', 'low',
       'gwnoa', 'gwnob', 'total_deaths', 'conflict_dummy'],
      dtype='object')

In [263]:
# convert all dates to datetime-objects
ucdp["date_start"] = pd.to_datetime(ucdp["date_start"])
ucdp["date_end"] = pd.to_datetime(ucdp["date_end"])

# add a new DataFrame with years from 1988-2023
ucdp = ucdp[(ucdp["year"] >= 1988) & (ucdp["year"] <= 2023)].copy()

# dropping all columns not needed
conflict = ucdp.drop(ucdp.columns[[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 44, 45, 46, 47, 48]], axis=1)

### Applying the definition for conflict-onset

In [264]:
# we need to classify a new conflict onset after 4 years of not reaching the threshold

# sort the rows after country and year (descending)
conflict.sort_values(by=["country", "year", "date_start"], ascending=False, ignore_index=True, inplace=True)

# grouping the dataset after country and years and sum up the deaths for each year
conflict_grouped = conflict.groupby(by=["country", "year"]).total_deaths.sum().unstack().copy()
conflict_grouped = conflict_grouped.reset_index().copy()
conflict_grouped = conflict_grouped.melt(id_vars="country").copy()
conflict_grouped = conflict_grouped.rename(columns={"value": "total_deaths"}).copy()
conflict_grouped.sort_values(by=["country", "year"], ascending=False, ignore_index=True, inplace=True)

# by grouping, the dataframe was changed into wide-format, which caused all countries to appear in every year column, even though not every country has a conflict in every year
# we need to change that by dropping all NaN-values
conflict_grouped = conflict_grouped.dropna().copy()

In [265]:
# problem: for the application of the conflict-onset-definition, we need to make sure, that there is no years are left out in the countries
# otherwise, the application will set years as conflict-onset, which are not

all_years = pd.DataFrame()

for country in conflict_grouped['country'].unique():
    country_df = conflict_grouped[conflict_grouped['country'] == country]
    min_year = country_df['year'].min()
    max_year = country_df['year'].max()
    full_years = pd.DataFrame({'year': range(min_year, max_year + 1)})
    full_years['country'] = country
    all_years = pd.concat([all_years, full_years])

conflict_grouped = pd.merge(all_years, conflict_grouped, on=['country', 'year'], how='left')

conflict_grouped["total_deaths"] = conflict_grouped["total_deaths"].astype(float).fillna(0)

In [266]:
# again, we need to apply the 1000 deaths threshold
conflict_grouped["conflict_dummy"] = conflict_grouped["total_deaths"].apply(lambda x: 1 if x > 1000 else 0)

In [267]:
# applying the definition for conflict-onset and create a new column for that

target_value = 0

country_grouped = conflict_grouped.groupby('country')

def check_condition(group, target_value):
    
    shifted_0 = group['conflict_dummy']
    shifted_1 = group['conflict_dummy'].shift(1)
    shifted_2 = group['conflict_dummy'].shift(2)
    shifted_3 = group['conflict_dummy'].shift(3)
    shifted_4 = group['conflict_dummy'].shift(4)

    condition = (shifted_0 == 1) & (shifted_1 == target_value) & (shifted_2 == target_value) & (shifted_3 == target_value) & (shifted_4 == target_value)
    return condition


conflict_grouped['conflict_onset'] = country_grouped.apply(lambda x: check_condition(x, target_value)).reset_index(level=0, drop=True)

conflict_grouped["conflict_onset"] = conflict_grouped["conflict_onset"].astype(str).copy()

  conflict_grouped['conflict_onset'] = country_grouped.apply(lambda x: check_condition(x, target_value)).reset_index(level=0, drop=True)


In [268]:
conflict_grouped.loc[conflict_grouped["conflict_onset"] == "True", "conflict_onset"] = 1
conflict_grouped.loc[conflict_grouped["conflict_onset"] == "False", "conflict_onset"] = 0

In [269]:
conflict = conflict_grouped

## Making the gdp-per-capita-dataset ready for statistical analysis

In [270]:
# data is in us-dollar per capita
gdp_melted = pd.melt(gdp, id_vars=["country"]).copy() # change the dataframe from wide-format to long-format

gdp_melted.rename(columns={"variable": "year", "value": "gdp"}, inplace=True) # renamed columns

# add a new dataframe with years from 1988-2023
gdp_final = gdp_melted[(gdp_melted["year"] >= 1988) & (gdp_melted["year"] <= 2023)].copy()
gdp_final.reset_index(inplace=True)
del(gdp_final["index"])

# changing the data-format of columns year and gdp
gdp_final.loc[gdp_final["gdp"] == "no data", "gdp"] = np.nan
gdp_final = gdp_final[gdp_final["country"] != "©IMF, 2024"].copy()
gdp_final = gdp_final[~gdp_final["country"].isna()] # deleting all country-entries which have missing values

# imputation of missing values in gdp column with mean value of gdp per capita for the specific country
mean_gdp = gdp_final.groupby(by="country").gdp.mean().copy()

gdp_merged = pd.merge(left=gdp_final, right=mean_gdp, on="country", how="left").copy() # left-join both dataframes

gdp_merged['gdp_x'] = gdp_merged['gdp_x'].combine_first(gdp_merged['gdp_y']) # replace all missing values with the mean value of its country

gdp_merged.rename(columns={"gdp_x": "gdp", "gdp_y": "gdp_mean"}, inplace=True) # renaming columns

gdp_merged["gdp"] = gdp_merged["gdp"].astype(float).copy()

gdp_merged.sort_values(by=["country", "year"], ascending=False, ignore_index=True, inplace=True)

gdp_merged.drop(columns=["gdp_mean"], inplace=True)

## Making the population-dataset ready for statistical analysis

In [271]:
# dropping and renaming some columns
population.drop(columns=["rank", "cca3", "continent", "density (km²)", "area (km²)", "growth rate", "world percentage"], inplace=True)

population.rename(columns={"2023 population": "2023", "2022 population": "2022", "2020 population": "2020", "2015 population": "2015", "2010 population": "2010", "2000 population": "2000", "1990 population": "1990", "1980 population": "1980", "1970 population": "1970"}, inplace=True)

In [272]:
population = population.melt(id_vars="country")

population.rename(columns={"variable": "year", "value": "population"}, inplace=True)

population.sort_values(by=["country", "year"], ascending=False, ignore_index=True, inplace=True)

## Now, we need to merge all three datasets

In [273]:
# use a left-join on the conflict-dataset, since this is the data, we are dependent on

joined_df = pd.merge(conflict, gdp_merged, on=["country", "year"], how="left")

joined_df.dropna(inplace=True, ignore_index=True)

joined_df.sort_values(by=["country", "year"], ascending=True, inplace=True)

In [274]:
def extract_rows_with_following(df, binary_col, entity_col, n_following=10):
    # Create an empty list to store the indices of rows to keep
    rows_to_keep = []
    
    # Iterate over unique entities
    for entity in df[entity_col].unique():
        # Filter rows for the current entity
        entity_df = df[df[entity_col] == entity]
        
        # Get the indices where the binary column is 1
        indices = entity_df.index[entity_df[binary_col] == 1]
        
        for idx in indices:
            # Calculate the range of rows to include
            end_idx = min(idx + n_following, entity_df.index[-1] + 1)
            rows_to_keep.extend(range(idx, end_idx))
    
    # Remove duplicates and keep the rows in the original DataFrame
    rows_to_keep = sorted(set(rows_to_keep))
    new_df = df.loc[rows_to_keep]
    
    return new_df

# Example usage
new_df = extract_rows_with_following(joined_df, 'conflict_onset', 'country', n_following=10)
joined_df = new_df

In [275]:
joined_df.set_index(["country", "year"], inplace=True)

In [278]:
joined_df.head(40)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_deaths,conflict_dummy,conflict_onset,gdp
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ukraine,2022,73820.0,1,1,4582.609
Ukraine,2023,70809.0,1,0,5337.491
Uganda,1996,1743.0,1,1,401.413
Uganda,1997,1313.0,1,0,423.918
Uganda,1998,1202.0,1,0,402.455
Uganda,1999,820.0,0,0,370.515
Uganda,2000,1157.0,1,0,355.466
Uganda,2001,212.0,0,0,347.812
Uganda,2002,1309.0,1,0,361.953
Uganda,2003,1592.0,1,0,361.772


## Next, the regression model (local projection) is specified

The regression formula should look similar to this:

$$
y_{it+h} - y_{it-1} = \beta_1^h c_{it} + \beta_2^h c_{it-1} + \sum_{j=1}^{h-1} \beta_j^h c_{it+h-j} + \theta_1^h \Delta y_{it-1} + \theta_2^h x_{it} + \mu_i^h + \gamma_t^h + \epsilon_{it}^h
$$

In [276]:
# now, the local projections formula is specified

for h in range(1, 11):
    c = joined_df["conflict_dummy"]
    c_shift = joined_df["conflict_dummy"].shift(1)
    delta_y = np.log(joined_df["gdp"].shift(1)) - np.log(joined_df["gdp"].shift(2))
    sum_term = sum([joined_df["conflict_dummy"].shift(h-j) for j in range(1, h)])
    y = np.log(joined_df["gdp"].shift(-h)) - np.log(joined_df["gdp"].shift(1))

    x = pd.DataFrame({"c": c, "c_shift": c_shift, "delta_y": delta_y, "sum_term": sum_term})
    x.sort_index(ascending=False, inplace=True)
    mod = sm.OLS(y, x, entity_effects=True, time_effects=True)
    res = mod.fit()
    print(f"Results for h = {h}")
    print(res.summary)

MissingDataError: exog contains inf or nans