### Exploratory Data Analysis
***
This part of the notebook is organised or worked out sequencitially like a pipe line where data gets modulated and plotted into variaous graphs to derive insights

In [149]:
# Data Libraries 
import pandas as pd 
import numpy as np

# plotting libraries 
import matplotlib.pyplot as plt 
import plotly.graph_objects as go
import seaborn as sns

# ignore warnings 
import warnings 
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [2]:
# data path 
# MacOS
abspath = './Data/Data.csv'
metadata = './Data/terrorism_metadata.csv'
# Windows 
# abspath = None
# metadata = None

In [3]:
# Functional Meta-Data --> usage - meta_data['column-name']
meta = pd.read_csv(metadata)
meta = meta.drop(meta.columns[2:],axis=1)
meta_data = {}
for x,y in zip(list(meta['Column']),list(meta['Desc'])):
    meta_data[x] = y

meta_data

{'eventid': 'Unique id for each event',
 'iyear': 'Year of incident',
 'imonth': 'Month of incident',
 'iday': 'Numeric day on which the incident occurred.',
 'extended': 'Yes/No for for whether the incedent extended for more than 24 hours.',
 'country': 'Country code',
 'country_txt': 'Country where the incident took place',
 'region ': 'Region code based on 12 regions.',
 'region_txt': 'Region in which the incident occurred',
 'city': 'Name of the city/village/town in which the incident took place.',
 'provstate': 'Name of the first order subnational region during the time of the event',
 'latitude': 'Coordinate',
 'longitude': 'Coordinate',
 'location': '*Exact* location where the incident took place',
 'summary': 'synopsis',
 'success': 'Yes/No for the success of the terrorist strike',
 'suicide': 'Yes/No for whether the attack was a suicide attack',
 'attacktype1_txt': 'The general method of attack/ the tactics used.',
 'targettype1_txt': 'The general category of victim of the att

In [4]:
# data encoding = latin1
data = pd.read_csv(abspath,encoding='latin1',low_memory=False)

In [5]:
data = data.drop('Unnamed: 0',axis=1) # dropping the redundant column

In [6]:
data.head()

Unnamed: 0,eventid,iyear,imonth,iday,extended,country,country_txt,region,region_txt,city,...,target1,natlty1,natlty1_txt,gname,motive,nperps,weaptype1,weaptype1_txt,nkill,propextent
0,197000000001,1970,7,2,0,58,Dominican Republic,2,Central America & Caribbean,Santo Domingo,...,Julio Guzman,58.0,Dominican Republic,MANO-D,Unknown,,13,Unknown,1.0,
1,197000000002,1970,0,0,0,130,Mexico,1,North America,Mexico city,...,"Nadine Chaval, daughter",21.0,Belgium,23rd of September Communist League,Unknown,7.0,13,Unknown,0.0,
2,197001000001,1970,1,0,0,160,Philippines,5,Southeast Asia,Unknown,...,Employee,217.0,United States,Unknown,Unknown,,13,Unknown,1.0,
3,197001000002,1970,1,0,0,78,Greece,8,Western Europe,Athens,...,U.S. Embassy,217.0,United States,Unknown,Unknown,,6,Explosives,,
4,197001000003,1970,1,0,0,101,Japan,4,East Asia,Fukouka,...,U.S. Consulate,217.0,United States,Unknown,Unknown,,8,Incendiary,,


In [7]:
# data stats
print('Number of rows : {}'.format(data.shape[0]))
print('Number of columns : {}'.format(data.shape[1]))
print('Data Size :  {}'.format(data.size))

Number of rows : 181691
Number of columns : 29
Data Size :  5269039


In [9]:
# just for country acronyms
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')

In [10]:
df['CODE']

0      AFG
1      ALB
2      DZA
3      ASM
4      AND
      ... 
217    VGB
218    WBG
219    YEM
220    ZMB
221    ZWE
Name: CODE, Length: 222, dtype: object

In [11]:
data[data['country_txt']=='United States']

Unnamed: 0,eventid,iyear,imonth,iday,extended,country,country_txt,region,region_txt,city,...,target1,natlty1,natlty1_txt,gname,motive,nperps,weaptype1,weaptype1_txt,nkill,propextent
5,197001010002,1970,1,1,0,217,United States,1,North America,Cairo,...,Cairo Police Headquarters,217.0,United States,Black Nationalists,To protest the Cairo Illinois Police Deparment,-99.0,5,Firearms,0.0,3.0
7,197001020002,1970,1,2,0,217,United States,1,North America,Oakland,...,Edes Substation,217.0,United States,Unknown,Unknown,-99.0,6,Explosives,0.0,3.0
8,197001020003,1970,1,2,0,217,United States,1,North America,Madison,...,"R.O.T.C. offices at University of Wisconsin, M...",217.0,United States,New Year's Gang,To protest the War in Vietnam and the draft,1.0,8,Incendiary,0.0,3.0
9,197001030001,1970,1,3,0,217,United States,1,North America,Madison,...,Selective Service Headquarters in Madison Wisc...,217.0,United States,New Year's Gang,To protest the War in Vietnam and the draft,1.0,8,Incendiary,0.0,3.0
10,197001050001,1970,1,1,0,217,United States,1,North America,Baraboo,...,Badger Army ammo depot.,217.0,United States,"Weather Underground, Weathermen",Unknown,,6,Explosives,0.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180461,201711130042,2017,11,13,0,217,United States,1,North America,Vale,...,Church,217.0,United States,Pro-LGBT Rights extremists,Pro-LGBT Rights extremists claimed responsibil...,-99.0,8,Incendiary,0.0,3.0
181141,201712070037,2017,12,7,0,217,United States,1,North America,Aztec,...,School,217.0,United States,White extremists,"An unaffiliated individual, identified as Will...",1.0,5,Firearms,3.0,3.0
181219,201712110001,2017,12,11,0,217,United States,1,North America,New York City,...,Bus Terminal,217.0,United States,Jihadi-inspired extremists,"An unaffiliated individual, identified as Akay...",1.0,6,Explosives,0.0,
181478,201712220022,2017,12,22,0,217,United States,1,North America,Harrisburg,...,Vehicle,217.0,United States,Unknown,Unknown,1.0,5,Firearms,0.0,3.0


In [12]:
meta_data['country']

'Country code'

### Choropleth Data Seperation

In [135]:
unq_terr = data['country_txt'].unique()

In [136]:
unq_terr[:20]

array(['Dominican Republic', 'Mexico', 'Philippines', 'Greece', 'Japan',
       'United States', 'Uruguay', 'Italy', 'East Germany (GDR)',
       'Ethiopia', 'Guatemala', 'Venezuela', 'West Germany (FRG)',
       'Switzerland', 'Jordan', 'Spain', 'Brazil', 'Egypt', 'Argentina',
       'Lebanon'], dtype=object)

In [138]:
# country list cross-check
c = 0
for x in unq_terr:
    if x in list(df['COUNTRY']):
        pass
    else:
        print(f"{x} -- Missing in the Terrorism Data")
        c+=1
print(f"{c} -- total count")

East Germany (GDR) -- Missing in the Terrorism Data
West Germany (FRG) -- Missing in the Terrorism Data
South Yemen -- Missing in the Terrorism Data
West Bank and Gaza Strip -- Missing in the Terrorism Data
Czechoslovakia -- Missing in the Terrorism Data
South Vietnam -- Missing in the Terrorism Data
Zaire -- Missing in the Terrorism Data
People's Republic of the Congo -- Missing in the Terrorism Data
Myanmar -- Missing in the Terrorism Data
Yugoslavia -- Missing in the Terrorism Data
North Yemen -- Missing in the Terrorism Data
South Korea -- Missing in the Terrorism Data
Bahamas -- Missing in the Terrorism Data
Rhodesia -- Missing in the Terrorism Data
Soviet Union -- Missing in the Terrorism Data
Western Sahara -- Missing in the Terrorism Data
New Hebrides -- Missing in the Terrorism Data
Guadeloupe -- Missing in the Terrorism Data
Martinique -- Missing in the Terrorism Data
Vatican City -- Missing in the Terrorism Data
French Guiana -- Missing in the Terrorism Data
Falkland Islands

In [139]:
# Creating Country-Acronym Dictionary 
cdict = {}
for x,y in zip(df['COUNTRY'],df['CODE']):
    cdict[x] = y

In [140]:
cdict['United States'] # name -> acronym

'USA'

In [71]:
# function to return acronyms
def country_acronym(country):
    if country in list(cdict.keys()):
        return cdict[country]
    else:
        return 'No Code'


In [20]:
data['country_acronym'] = data['country_txt'].apply(country_acronym) # acronym column creation

### Choropleth DataFrame

In [65]:
choropleth = pd.DataFrame(pd.Series(data['country_txt'].unique(),name='Country'))

In [72]:
choropleth['Code'] = choropleth['Country'].apply(country_acronym)

In [75]:
choropleth = choropleth[choropleth['Code']!='No Code']

In [93]:
# function which returns the total number of terrorism-events ( Successful ) in a country
def calc_total(country):
    if not country in data['country_txt']:
        return data[data['country_txt']==country]['success'].sum()
    else:
        return 0

In [94]:
choropleth['total_success'] = choropleth['Country'].apply(calc_total) # calc 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [127]:
choropleth # dataframe structure to create choropleth plot using plotly go 

Unnamed: 0,Country,Code,total_success
0,Dominican Republic,DOM,85
1,Mexico,MEX,479
2,Philippines,PHL,5975
3,Greece,GRC,1126
4,Japan,JPN,341
...,...,...,...
198,Montenegro,MNE,5
200,Turkmenistan,TKM,1
202,Bhutan,BTN,6
203,Serbia,SRB,11


In [132]:
top20 = choropleth.sort_values(by='total_success',ascending=False)[:20] # optional

In [148]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Choropleth(
    locations = choropleth['Code'],
    z = choropleth['total_success'],
    text = choropleth['Country'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='black',
    marker_line_width=0.5,
    colorbar_title = 'Numerical Scale',
))

fig.update_layout(
    title_text='Global Terrorism Instances',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='natural earth'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='',
        showarrow = False,
    )]
)

fig.show()
'''
The Following Plot considers events of every scale ever to occur and recorded in this database,
each country's terrorism events (successful) are clustered/aggregated to get a concrete value 
which is then plotted on this choropleth map.

'''


***