In [1]:
import pandas as pd
import numpy as np
import wbgapi as wb
import warnings
warnings.filterwarnings('ignore')

## 2009-2019

In [7]:
data = pd.read_csv('data/raw_data/globalterrorismdb_0221dist_GTDdata.csv')

In [8]:
# Reduce to selected columns
data = data[['iyear','imonth','country_txt','region_txt','provstate','city','latitude','longitude','success',
       'attacktype1_txt','targtype1_txt','targsubtype1_txt','gname','nperps','weaptype1_txt','nkill','nwound']]
data = data.rename(columns = {'iyear':'Year', 'imonth':'Month', 'country_txt':'Country', 'region_txt':'Region', 
                          'provstate':'State', 'city':'City', 'latitude':'Latitude', 'longitude':'Longitude', 
                          'success':'Success', 'attacktype1_txt':'Type of Attack', 'targtype1_txt':'Type of Target',
                          'targsubtype1_txt':'Target Subcategory', 'gname':'Group Name', 'nperps':'Number of Perpetrators',
                          'weaptype1_txt':'Type of Weapon', 'nkill':'Fatalities','nwound':'Wounded'})


# 2009-2019 Data
df = data[data['Year'] >= 2009]
df.head()

Unnamed: 0,Year,Month,Country,Region,State,City,Latitude,Longitude,Success,Type of Attack,Type of Target,Target Subcategory,Group Name,Number of Perpetrators,Type of Weapon,Fatalities,Wounded
90160,2009,1,Somalia,Sub-Saharan Africa,Banaadir,Mogadishu,2.059819,45.326115,1,Bombing/Explosion,Private Citizens & Property,Race/Ethnicity Identified,Al-Shabaab,-99.0,Explosives,8.0,10.0
90161,2009,1,Iraq,Middle East & North Africa,Nineveh,Mosul,36.354145,43.14357,1,Armed Assault,Private Citizens & Property,Laborer (General)/Occupation Identified,Unknown,-99.0,Firearms,1.0,0.0
90162,2009,1,Iraq,Middle East & North Africa,Kirkuk,Kirkuk,35.45211,44.375465,1,Bombing/Explosion,Police,Police Security Forces/Officers,Unknown,-99.0,Explosives,0.0,0.0
90163,2009,1,Iraq,Middle East & North Africa,Diyala,Mandali,33.744999,45.545616,1,Bombing/Explosion,Private Citizens & Property,Unnamed Civilian/Unspecified,Unknown,-99.0,Explosives,0.0,3.0
90164,2009,1,West Bank and Gaza Strip,Middle East & North Africa,West Bank,Bayt Umar,31.626705,35.104969,1,Bombing/Explosion,Transportation,Bus (excluding tourists),Unknown,-99.0,Explosives,0.0,0.0


In [9]:
df.to_csv('data/initial.csv', index=False)

## Capital Cities

In [10]:
# Collect capital cities
capitals = pd.read_csv('data/raw_data/countries_capitals.csv')
capitals = capitals[['name','iso3','capital','latitude', 'longitude']].rename(columns={'name':'Country','iso3':'Code','capital':'Capital'})

# Rename countries to match between gtd data and country capitals
capitals = capitals.replace(['Korea South','Bahamas The','Bosnia and Herzegovina','Congo The Democratic Republic Of The',
                 "Cote D'Ivoire (Ivory Coast)",'Croatia (Hrvatska)','Netherlands The','Papua new Guinea',
                 'Trinidad And Tobago'],
                 ['South Korea','Bahamas','Bosnia-Herzegovina','Democratic Republic of the Congo','Ivory Coast',
                 'Croatia','Netherlands','Papua New Guinea','Trinidad and Tobago'])
df = df.replace(['Republic of the Congo','Slovak Republic'],['Congo','Slovakia'])

# Merge to capital cities to gtd data
capitals_df = df.merge(capitals, on='Country')

# Reorganize columns
gtd_capitals = capitals_df[['Country','Code','Region','State','Capital','City','Year','Month',
                            'Latitude','Longitude','Success','Type of Attack','Type of Target',
                            'Target Subcategory','Group Name','Number of Perpetrators',
                            'Type of Weapon','Fatalities','Wounded']]
gtd_capitals.head()

Unnamed: 0,Country,Code,Region,State,Capital,City,Year,Month,Latitude,Longitude,Success,Type of Attack,Type of Target,Target Subcategory,Group Name,Number of Perpetrators,Type of Weapon,Fatalities,Wounded
0,Somalia,SOM,Sub-Saharan Africa,Banaadir,Mogadishu,Mogadishu,2009,1,2.059819,45.326115,1,Bombing/Explosion,Private Citizens & Property,Race/Ethnicity Identified,Al-Shabaab,-99.0,Explosives,8.0,10.0
1,Somalia,SOM,Sub-Saharan Africa,Bay,Mogadishu,Burhakaba,2009,1,2.798167,44.07856,1,Hostage Taking (Kidnapping),Government (General),"Government Personnel (excluding police, military)",Somali Islamic Front,-99.0,Unknown,0.0,0.0
2,Somalia,SOM,Sub-Saharan Africa,Bay,Mogadishu,Burhakaba,2009,1,2.798167,44.07856,1,Hostage Taking (Kidnapping),Government (General),"Government Personnel (excluding police, military)",23 May Democratic Alliance (Algeria),-99.0,Unknown,0.0,0.0
3,Somalia,SOM,Sub-Saharan Africa,Banaadir,Mogadishu,Mogadishu,2009,1,2.059819,45.326115,1,Armed Assault,Government (Diplomatic),"International Organization (peacekeeper, aid a...",Unknown,-99.0,Firearms,1.0,0.0
4,Somalia,SOM,Sub-Saharan Africa,Gedo,Mogadishu,Garbaharey,2009,1,3.32728,42.218563,1,Bombing/Explosion,Government (General),"Government Personnel (excluding police, military)",Al-Shabaab,-99.0,Explosives,4.0,0.0


## WB data: Income Level

In [6]:
# Collect Population & GDP data
wb_df = wb.data.DataFrame(
    ['NY.GDP.PCAP.CD', 'SP.POP.TOTL'], mrv=11, columns = 'series', labels = True).reset_index().set_index(
    'economy').join(wb.economy.DataFrame()['incomeLevel']).drop('time', axis=1)
wb_df = wb_df.rename(columns={'NY.GDP.PCAP.CD':'GDP','SP.POP.TOTL':'Population','incomeLevel':'Income Level',
                             'Time':'Year'})
wb_df['Year'] = pd.to_numeric(wb_df['Year'])

# Convert index to column
wb_df = wb_df.reset_index().rename(columns={'index':'Code'}).drop('Country', axis = 1)
wb_df.head()

Unnamed: 0,Code,Year,GDP,Population,Income Level
0,ABW,2019,,106314.0,HIC
1,ABW,2018,,105845.0,HIC
2,ABW,2017,29007.693003,105366.0,HIC
3,ABW,2016,28281.350482,104872.0,HIC
4,ABW,2015,27980.880695,104341.0,HIC


In [7]:
# merge with gtd_capitals for new dataframe
gtd_wb = wb_df.merge(gtd_capitals, on=['Code', 'Year'])
gtd_wb = gtd_wb.replace(['LIC','UMC','LMC','HIC'],['Low Income','Upper Middle','Lower Middle','High Income'])

# Calculate number of casualties
gtd_wb['Casualties'] = gtd_wb.Fatalities + gtd_wb.Wounded
gtd_wb.head()

Unnamed: 0,Code,Year,GDP,Population,Income Level,Country,Region,State,Capital,City,...,Success,Type of Attack,Type of Target,Target Subcategory,Group Name,Number of Perpetrators,Type of Weapon,Fatalities,Wounded,Casualties
0,AFG,2019,507.103432,38041754.0,Low Income,Afghanistan,South Asia,Kandahar,Kabul,Mandozai,...,1,Bombing/Explosion,Military,Military Barracks/Base/Headquarters/Checkpost,Taliban,-99.0,Explosives,5.0,6.0,11.0
1,AFG,2019,507.103432,38041754.0,Low Income,Afghanistan,South Asia,Takhar,Kabul,Dashti Qala district,...,1,Armed Assault,Police,Police Security Forces/Officers,Taliban,-99.0,Firearms,5.0,3.0,8.0
2,AFG,2019,507.103432,38041754.0,Low Income,Afghanistan,South Asia,Baghlan,Kabul,Khwaja Khan,...,1,Hostage Taking (Kidnapping),Private Citizens & Property,Political Party Member/Rally,Taliban,-99.0,Firearms,,,
3,AFG,2019,507.103432,38041754.0,Low Income,Afghanistan,South Asia,Herat,Kabul,Islam Kala,...,1,Bombing/Explosion,Military,NATO,Taliban,2.0,Explosives,1.0,1.0,2.0
4,AFG,2019,507.103432,38041754.0,Low Income,Afghanistan,South Asia,Ghazni,Kabul,Ghazni,...,1,Unknown,Police,"Police Building (headquarters, station, school)",Taliban,-99.0,Unknown,10.0,5.0,15.0


In [9]:
gtd_wb.to_csv('data/gtd_global.csv', index=False)

## "Extremists"

In [8]:
# Collect all groups that had the label "extremists"
extrem_df = gtd_wb[gtd_wb['Group Name'].str.contains('extrem')]

# Collect columns
extrem_df = extrem_df[['Year','Code','Country','Region','Income Level','Group Name',
                         'Fatalities','Wounded','Casualties','Type of Target']]

# Categorize the listed extremist groups and set in new column
original = extrem_df['Group Name'].unique().tolist()
type_new = ['government', 'religious','ethnic', 'Animal Rights','religious', 'religious','religious', 'ethnic','ethnic', 
         'political','ethnic', 'ethnic','Incel', 'political','nationalist', 'ethnic', 'ethnic','political', 'political',
         'ethnic', 'religious','ethnic', 'ethnic', 'religious','ethnic', 'religious','Conspiracy theory', 'Feminist',
         'religious', 'religious','religious', 'ethnic', 'nationalist','religious', 'religious','government', 'ethnic',
         'ethnic', 'ethnic','political', 'political','political', 'ethnic','government', 'government','ethnic', 'Anti-LGBT', 
         'ethnic','ethnic', 'ethnic','government', 'ethnic','ethnic', 'nationalist','religious', 'ethnic','ethnic', 
         'nationalist','government', 'ethnic','Pro-choice', 'government','Anti-Abortion', 'political','government', 'ethnic',
         'Pro-LGBT Rights ', 'Court Reform','religious', 'Anti-Gun Control','nationalist', 'nationalist']

extremists = extrem_df.copy(deep=True)
for i, item in enumerate(original):
    extremists = extremists.replace(item, type_new[i])
    
# All anti- extremists are categories under the same group as pro- extremists
extremists = extremists.replace(dict.fromkeys(['Anti-religious','Sunni religious','Shia religious',
                                                              'Houthi extremists (Ansar Allah)'], ['religious']))
extremists = extremists.replace('Anti-ethnic','ethnic')

extremists = extremists.rename(columns={'Group Name':'Group Type'})

# Re-add Group Type column
extremists['Group Name'] = extrem_df['Group Name']
extremists.head()

Unnamed: 0,Year,Code,Country,Region,Income Level,Group Type,Fatalities,Wounded,Casualties,Type of Target,Group Name
593,2019,AFG,Afghanistan,South Asia,Low Income,government,0.0,0.0,0.0,Unknown,Anti-Government extremists
5543,2016,AFG,Afghanistan,South Asia,Low Income,religious,0.0,0.0,0.0,NGO,Muslim extremists
8905,2014,AFG,Afghanistan,South Asia,Low Income,religious,1.0,1.0,2.0,Journalists & Media,Muslim extremists
14756,2016,ALB,Albania,Eastern Europe,Upper Middle,religious,0.0,0.0,0.0,Private Citizens & Property,Muslim extremists
14777,2017,ARG,Argentina,South America,Upper Middle,ethnic,0.0,0.0,0.0,Business,Mapuche extremists


In [None]:
extremists.to_csv('data/extremists_groups.csv', index=False)