In [1]:
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Interim Dataset File to Load
WorldConflictFile = "../1_Input/DF_Interim/worldconflict_data.csv"

# Load to dataframe
conflict_df = pd.read_csv(WorldConflictFile)

conflict_df.head()

Unnamed: 0,Conflict Id,Year,Latitude,Longitude,Country,Country Id,Best Death Estimate
0,217843,2002,43.5072,46.334,Russian Federation,365,14
1,175934,2002,32.206406,35.286583,Israel,666,7
2,175981,2002,31.506792,35.027717,Israel,666,3
3,175983,2002,32.609067,35.289202,Israel,666,1
4,175984,2002,31.902561,35.195547,Israel,666,3


In [3]:
# Group data by Country and year to get key values
grp_conflict = conflict_df.groupby(["Country", "Year"])
conflictCount = pd.DataFrame(grp_conflict["Conflict Id"].count())
fatalitiesCount = pd.DataFrame(grp_conflict["Best Death Estimate"].sum())

conflict_data_perYear = pd.merge(conflictCount, fatalitiesCount, how="outer", left_index = True, right_index = True)

conflict_data_perYear

Unnamed: 0_level_0,Unnamed: 1_level_0,Conflict Id,Best Death Estimate
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,2000,207,5394
Afghanistan,2001,263,5553
Afghanistan,2002,94,1157
Afghanistan,2003,186,944
Afghanistan,2004,241,817
Afghanistan,2005,430,1711
Afghanistan,2006,848,4982
Afghanistan,2007,1042,7020
Afghanistan,2008,1114,5660
Afghanistan,2009,1514,6499


In [4]:
# Group by Country only to get a mean value
c1 = conflict_data_perYear.reset_index().groupby(["Country"])

conflictCount_mean = pd.DataFrame(c1["Conflict Id"].mean())
conflictDeath_mean = pd.DataFrame(c1["Best Death Estimate"].mean())

conflictCount_mean.head()

Unnamed: 0_level_0,Conflict Id
Country,Unnamed: 1_level_1
Afghanistan,1169.777778
Algeria,117.111111
Angola,25.352941
Armenia,3.0
Azerbaijan,14.5625


In [5]:
conflictDeath_mean.head()

Unnamed: 0_level_0,Best Death Estimate
Country,Unnamed: 1_level_1
Afghanistan,6669.777778
Algeria,409.444444
Angola,245.058824
Armenia,5.5
Azerbaijan,26.5


In [7]:
# Join Conflict columns
c2 = pd.merge(conflictCount_mean, conflictDeath_mean, how="outer", left_index = True, right_index = True)

c2 = c2.rename(columns = {"Conflict Id": "Conflict Count",
                          "Best Death Estimate":  "Conflict Deaths"
                         })

c2.head()

Unnamed: 0_level_0,Conflict Count,Conflict Deaths
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,1169.777778,6669.777778
Algeria,117.111111,409.444444
Angola,25.352941,245.058824
Armenia,3.0,5.5
Azerbaijan,14.5625,26.5


In [9]:
# Load country population data to calculate a Conflict Death Rate

# Interim Dataset File to Load
population_data_file = "../1_Input/DF_Interim/population_data.csv"

# Load to dataframe
population_df = pd.read_csv(population_data_file)

population_df.head()

Unnamed: 0,Country,Population
0,Afghanistan,32956936
1,Albania,2900114
2,Algeria,40026405
3,Angola,25426102
4,Antigua and Barbuda,92279


In [10]:
# Merge population and conflict file
conflict_perCountry = pd.merge(population_df, c2, how="left", on="Country")

# Calculate Conflict Death Rate
conflict_perCountry["Conflict Death Rate"] = conflict_perCountry["Conflict Deaths"] / conflict_perCountry["Population"] * 100000

conflict_perCountry

Unnamed: 0,Country,Population,Conflict Count,Conflict Deaths,Conflict Death Rate
0,Afghanistan,32956936,1169.777778,6669.777778,20.237858
1,Albania,2900114,,,
2,Algeria,40026405,117.111111,409.444444,1.022936
3,Angola,25426102,25.352941,245.058824,0.963808
4,Antigua and Barbuda,92279,,,
5,Arab Rep of Egypt,92456224,,,
6,Argentina,43633470,,,
7,Armenia,3022275,3.000000,5.500000,0.181982
8,Aruba,104085,,,
9,Australia,24140741,,,


In [None]:
# Save the data:
conflict_perCountry.to_csv('../../projWANGRY/1_Input/DF_Interim/conflict_perCountry.csv', index=False, encoding='utf-8')