# Estimating the macroeconomic effect of conflicts with local projections

### First, we need to import the relevant libraries

In [75]:
import pandas as pd
import numpy as np
import statsmodels.api as sm # We'll use statsmodels for the local projections
import statsmodels.formula.api as smf
import datetime as datetime

### Then, we need to import the datasets we want to use

In [76]:
ucdp = pd.read_csv("/Users/niklas/Documents/Uni/5. Semester/Data Science mit Python/UCDP.csv", low_memory=False)

gdp = pd.read_excel("/Users/niklas/Documents/Uni/5. Semester/Data Science mit Python/GDP.xlsx")

population = pd.read_csv("/Users/niklas/Documents/Uni/5. Semester/Data Science mit Python/population.csv", sep=",")

## Making the ucdp-dataset ready for statistical analysis

In [77]:
ucdp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 349733 entries, 0 to 349732
Data columns (total 49 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   id                 349733 non-null  int64  
 1   relid              349733 non-null  object 
 2   year               349733 non-null  int64  
 3   active_year        349733 non-null  int64  
 4   code_status        349733 non-null  object 
 5   type_of_violence   349733 non-null  int64  
 6   conflict_dset_id   349733 non-null  int64  
 7   conflict_new_id    349733 non-null  int64  
 8   conflict_name      349733 non-null  object 
 9   dyad_dset_id       349733 non-null  int64  
 10  dyad_new_id        349733 non-null  int64  
 11  dyad_name          349733 non-null  object 
 12  side_a_dset_id     349733 non-null  int64  
 13  side_a_new_id      349733 non-null  int64  
 14  side_a             349733 non-null  object 
 15  side_b_dset_id     349733 non-null  int64  
 16  si

In [78]:
ucdp["total_deaths"] = ucdp.deaths_a + ucdp.deaths_b + ucdp.deaths_civilians # make a column for the total deaths

ucdp["conflict_dummy"] = ucdp["total_deaths"].apply(lambda x: 1 if x > 1000 else 0) # all conflicts, that bypass a threshold of 1,000 deaths, are coded to 1 and all others are coded to 0

len(ucdp[ucdp["conflict_dummy"] == 1]) # the total threshold identifies 202 conflicts

202

In [79]:
ucdp.columns

Index(['id', 'relid', 'year', 'active_year', 'code_status', 'type_of_violence',
       'conflict_dset_id', 'conflict_new_id', 'conflict_name', 'dyad_dset_id',
       'dyad_new_id', 'dyad_name', 'side_a_dset_id', 'side_a_new_id', 'side_a',
       'side_b_dset_id', 'side_b_new_id', 'side_b', 'number_of_sources',
       'source_article', 'source_office', 'source_date', 'source_headline',
       'source_original', 'where_prec', 'where_coordinates',
       'where_description', 'adm_1', 'adm_2', 'latitude', 'longitude',
       'geom_wkt', 'priogrid_gid', 'country', 'country_id', 'region',
       'event_clarity', 'date_prec', 'date_start', 'date_end', 'deaths_a',
       'deaths_b', 'deaths_civilians', 'deaths_unknown', 'best', 'high', 'low',
       'gwnoa', 'gwnob', 'total_deaths', 'conflict_dummy'],
      dtype='object')

In [80]:
# convert all dates to datetime-objects
ucdp["date_start"] = pd.to_datetime(ucdp["date_start"])
ucdp["date_end"] = pd.to_datetime(ucdp["date_end"])

# add a new DataFrame with years from 1988-2023
ucdp = ucdp[(ucdp["year"] >= 1988) & (ucdp["year"] <= 2023)].copy()

# dropping all columns not needed
conflict = ucdp.drop(ucdp.columns[[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 44, 45, 46, 47, 48]], axis=1)

### Applying the definition for conflict-onset

In [100]:
# we need to classify a new conflict onset after 4 years of not reaching the threshold

# sort the rows after country and year (descending)
conflict.sort_values(by=["country", "year", "date_start"], ascending=False, ignore_index=True, inplace=True)

# grouping the dataset after country and years and sum up the deaths for each year
conflict_grouped = conflict.groupby(by=["country", "year"]).total_deaths.sum().unstack().copy()
conflict_grouped = conflict_grouped.reset_index().copy()
conflict_grouped = conflict_grouped.melt(id_vars="country").copy()
conflict_grouped = conflict_grouped.rename(columns={"value": "total_deaths"}).copy()
conflict_grouped.sort_values(by=["country", "year"], ascending=False, ignore_index=True, inplace=True)

#conflict["conflict_onset"] = conflict["conflict_dummy"].apply(lambda x: x.rolling(4, min_periods=4, ))

## Making the gdp-per-capita-dataset ready for statistical analysis

In [81]:
# data is in us-dollar per capita
gdp_melted = pd.melt(gdp, id_vars=["country"]).copy() # change the dataframe from wide-format to long-format

gdp_melted.rename(columns={"variable": "year", "value": "gdp"}, inplace=True) # renamed columns

# add a new dataframe with years from 1988-2023
gdp_final = gdp_melted[(gdp_melted["year"] >= 1988) & (gdp_melted["year"] <= 2023)].copy()
gdp_final.reset_index(inplace=True)
del(gdp_final["index"])

# changing the data-format of columns year and gdp
gdp_final.loc[gdp_final["gdp"] == "no data", "gdp"] = np.nan
gdp_final = gdp_final[gdp_final["country"] != "©IMF, 2024"].copy()
gdp_final = gdp_final[~gdp_final["country"].isna()] # deleting all country-entries which have missing values

# imputation of missing values in gdp column with mean value of gdp per capita for the specific country
mean_gdp = gdp_final.groupby(by="country").gdp.mean().copy()

gdp_merged = pd.merge(left=gdp_final, right=mean_gdp, on="country", how="left").copy() # left-join both dataframes

gdp_merged['gdp_x'] = gdp_merged['gdp_x'].combine_first(gdp_merged['gdp_y']) # replace all missing values with the mean value of its country

gdp_merged.rename(columns={"gdp_x": "gdp", "gdp_y": "gdp_mean"}, inplace=True) # renaming columns

## Making the population-dataset ready for statistical analysis

In [82]:
# dropping and renaming some columns
population.drop(columns=["rank", "cca3", "continent", "density (km²)", "area (km²)", "growth rate", "world percentage"], inplace=True)

population.rename(columns={"2023 population": "2023", "2022 population": "2022", "2020 population": "2020", "2015 population": "2015", "2010 population": "2010", "2000 population": "2000", "1990 population": "1990", "1980 population": "1980", "1970 population": "1970"}, inplace=True)

In [83]:
population = population.melt(id_vars="country")

## Next, the regression model (local projection) is specified

In [84]:
X = conflict["conflict_dummy"].exog
Y = weo[]

lp = sm.OLS(Y, X)

res = lp.fit()

print(res.summary())

SyntaxError: invalid syntax (1672828355.py, line 2)