# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

# Loading the dataset

In [2]:
data = pd.read_csv("globalterrorismdb.csv", encoding='ISO-8859-1')

In [3]:
data.head()

Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,region,...,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
0,197000000001,1970,7,2,,0,,58,Dominican Republic,2,...,,,,,PGIS,0,0,0,0,
1,197000000002,1970,0,0,,0,,130,Mexico,1,...,,,,,PGIS,0,1,1,1,
2,197001000001,1970,1,0,,0,,160,Philippines,5,...,,,,,PGIS,-9,-9,1,1,
3,197001000002,1970,1,0,,0,,78,Greece,8,...,,,,,PGIS,-9,-9,1,1,
4,197001000003,1970,1,0,,0,,101,Japan,4,...,,,,,PGIS,-9,-9,1,1,


In [4]:
data.tail()

Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,region,...,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
181686,201712310022,2017,12,31,,0,,182,Somalia,11,...,,"""Somalia: Al-Shabaab Militants Attack Army Che...","""Highlights: Somalia Daily Media Highlights 2 ...","""Highlights: Somalia Daily Media Highlights 1 ...",START Primary Collection,0,0,0,0,
181687,201712310029,2017,12,31,,0,,200,Syria,10,...,,"""Putin's 'victory' in Syria has turned into a ...","""Two Russian soldiers killed at Hmeymim base i...","""Two Russian servicemen killed in Syria mortar...",START Primary Collection,-9,-9,1,1,
181688,201712310030,2017,12,31,,0,,160,Philippines,5,...,,"""Maguindanao clashes trap tribe members,"" Phil...",,,START Primary Collection,0,0,0,0,
181689,201712310031,2017,12,31,,0,,92,India,6,...,,"""Trader escapes grenade attack in Imphal,"" Bus...",,,START Primary Collection,-9,-9,0,-9,
181690,201712310032,2017,12,31,,0,,160,Philippines,5,...,,"""Security tightened in Cotabato following IED ...","""Security tightened in Cotabato City,"" Manila ...",,START Primary Collection,-9,-9,0,-9,


# Summarising the data

In [None]:
data.shape

In [None]:
data.describe()

In [None]:
data.columns.values

In [None]:
data.isnull().sum()

In [None]:
data.corr()

In [None]:
data.dtypes

# Renaming and selecting the relevant columns

In [None]:
data.rename(columns={'iyear':'Year','imonth':'Month','iday':'Day','country_txt':'Country','provstate':'State','region_txt':'Region',
                     'city': 'City', 'latitude':'Latitude', 'longitude':'Longitude', 'attacktype1_txt':'Attack_Type','target1':'Target',
                     'nkill':'Killed','nwound':'Wounded','summary':'Summary','gname':'Group','targtype1_txt':'Target_Type',
                     'weaptype1_txt':'Weapon_Type','motive':'Motive'}, inplace=True)

In [None]:
data.head()

In [None]:
data=data[['Year','Month','Day','Region','Country','State','City','Latitude','Longitude','Attack_Type','Killed','Wounded',
       'Target','Summary','Group','Target_Type','Weapon_Type','Motive']]

In [None]:
data.head()

# Dealing with missing values

In [None]:
data['Wounded'] = data['Wounded'].fillna(0).astype(int)
data['Killed'] = data['Killed'].fillna(0).astype(int)

In [None]:
data.head()

# Exploratory Data Analysis (EDA)

In [None]:
plt.figure(figsize=(20, 10))
sns.countplot('Year', data=data, edgecolor='blue')
plt.xticks(rotation=90)
plt.title('No. of Terrorist Activities by Year',fontsize=25)
plt.xlabel('Year', fontsize=18)
plt.ylabel('Count', fontsize=18)
plt.show()

**It is observed that terrorist activities have increased in the 2010s (as compared to earlier decades) and hit their peak in 2014.**

In [None]:
plt.figure(figsize=(20, 10))
sns.countplot('Month', data=data, edgecolor='blue')
plt.title('No. of Terrorist Activities by Month',fontsize=24)
plt.xlabel('Month', fontsize=18)
plt.ylabel('Count', fontsize=18)
plt.show()

**The distribution for terrorist activities is pretty even across all months, although the month of May reports the highest no. of terrorist activities.**

In [None]:
plt.figure(figsize=(20, 10))
sns.countplot('Day', data=data, edgecolor='blue')
plt.title('No. of Terrorist Activities by Day', fontsize=23)
plt.xlabel('Day', fontsize=18)
plt.ylabel('Count', fontsize=18)
plt.show()

**The distribution is very even across all days except for 31st, which is probably due to the fact that not every month has 31 days. It is observed that the most terrorist activities are reported on the 15th of a month.**

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(data['Region'].value_counts()[:10].index,data['Region'].value_counts()[:10].values)
plt.title('No. of Terrorist Activities by Region', fontsize=24)
plt.xlabel('Regions', fontsize=18)
plt.ylabel('Count', fontsize=18)
plt.show()

**It is observed that terrorist activities are more frequent in Middle Eastern and North African regions, followed by South Asia ranging above the 40000 mark. These regions can be identified as the hotzones for terrorism.**

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(data['Country'].value_counts()[:10].index,data['Country'].value_counts()[:10].values)
plt.title('No. of Terrorist Activities by Country', fontsize=23)
plt.xlabel('Countries',fontsize=18)
plt.ylabel('Count',fontsize=18)
plt.show()

**This shows the top 10 most attacked countries. Iraq is the most prone to terrorist activities followed by Pakistan, Afghanistan and India, eaching having suffered from over 10000 attacks in the past few decades.**

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(data['State'].value_counts()[:10].index,data['State'].value_counts()[:10].values)
plt.title('No. of Terrorist Activities by State', fontsize=23)
plt.xlabel('States',fontsize=18)
plt.ylabel('Count',fontsize=18)
plt.show()

**It is observed that Baghdad reports the highest number of terrorist activities (over 7500) among other major states and provinces of the world, distantly followed by Northern Ireland.**

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(data['City'].value_counts()[1:11].index,data['City'].value_counts()[1:11].values)
plt.title('No. of Terrorist Activities by City', fontsize=23)
plt.xlabel('Cities',fontsize=18)
plt.ylabel('Count',fontsize=18)
plt.show()

**It is observed that the city of Baghdad reports the highest number of terrorist activities (around 7500) among other major cities of the world, distantly followed by Karachi in Pakistan.**

In [None]:
plt.figure(figsize=(20, 10))
sns.countplot('Attack_Type', data=data, order=data['Attack_Type'].value_counts().index)
plt.xticks(rotation=15)
plt.title('Terrorist Activities by Attack Type', fontsize=23)
plt.xlabel('Attack Type', fontsize=18)
plt.ylabel('Count', fontsize=18)
plt.show()

**It is observed that among the primary types of terrorist attacks, Bombings/Explosions are the most common, followed by Armed Assaults and Assassinations, albeit not very closely.**

In [None]:
plt.figure(figsize=(20, 10))
sns.countplot('Weapon_Type', data=data, order=data['Weapon_Type'].value_counts().index)
plt.xticks(rotation=15)
plt.title('Mostly used weapons during attack', fontsize=23)
plt.xlabel('Weapon Type', fontsize=18)
plt.ylabel('Count', fontsize=18)
plt.show()

**It is observed that explosives are mostly used during the attack followed by firearms**

In [None]:
plt.figure(figsize=(20, 10))
sns.countplot('Target_Type', data=data, order=data['Target_Type'].value_counts()[:10].index)
plt.xticks(rotation=15)
plt.title('Terrorist Activities by Target Type', fontsize=23)
plt.xlabel('Target Type', fontsize=18)
plt.ylabel('Count', fontsize=18)
plt.show()

**It is observed that most frequently, the targets of terrorist attacks tend to be Private Citizens (or civilians) and Property, followed by the military and police forces, as well as government and business bodies.**

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(data['Group'].value_counts()[1:16].values, data['Group'].value_counts()[1:16].index, orient='h')
plt.title('No. of Terrorist Activities by Terrorist Group', fontsize=23)
plt.xlabel('Groups', fontsize=18)
plt.ylabel('Count', fontsize=18)
plt.show()

**It is observed that acts of terrorism have most frequently been orchestrated by the Taliban (ranging over 7000) followed by the ISIL, the SL, the FMLN, and Al-Shabaab.**

# Recording Casualities

In [None]:
df = data[['Group','Country','Killed', 'Wounded']]
df = df.groupby(['Group','Country'], axis=0).sum().sort_values(['Killed', 'Wounded'], ascending=False).drop('Unknown').reset_index().head(10)
df

**The ISIL has been most actively terrorising Iraq with over 31000 people killed and 23000 wounded. The Taliban has been committing its equal share of terrorism in Afghanistan with over 29000 killed and 27000 wounded.**

In [None]:
casualties_by_year = data.pivot_table(columns='Year', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_year

**Most deaths due to terrorism occurred in 2014, numbering at around 44000.**

In [None]:
casualties_by_month = data.pivot_table(columns='Month', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_month.drop(0, axis=1)

**Most deaths due to terrorism have occured in the month of July, numbering at around 38000.**

In [None]:
casualties_by_day = data.pivot_table(columns='Day', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_day.drop(0, axis=1)

**Most deaths due to terrorism have occured on the 11th of a month, numbering at around 17000.**

In [None]:
casualties_by_region = data.pivot_table(columns='Region', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_region

**Most deaths due to terrorism have occured in the Middle East and North Africa numbering at around 137000.**

In [None]:
casualties_by_country = data.pivot_table(columns='Country', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_country

**Most deaths due to terrorism have occured in Iraq, numbering at around 78000.**

In [None]:
casualties_by_state = data.pivot_table(columns='State', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_state

**Most deaths due to terrorism have occured in the province of Baghdad numbering at around 21000.**

In [None]:
casualties_by_city = data.pivot_table(columns='City', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_city

**Most deaths due to terrorism have occured in the city of Baghdad numbering at around 21000.**

In [None]:
casualties_by_attack_type = data.pivot_table(columns='Attack_Type', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_attack_type

**Most deaths due to terrorism have occured from armed assaults numbering at around 160000.**

In [None]:
casualties_by_target_type = data.pivot_table(columns='Target_Type', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_target_type

**Most deaths due to terrorism have occured among civilians and private citizens numbering at around 140000.**

In [None]:
casualties_by_group = data.pivot_table(columns='Group', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_group

**Most deaths due to terrorism have been caused by the terrorist activities committed by Islamic State of Iraq and the Levant (ISIL) numbering at around 39000.**