
# LGMVIP Task 2 - Intermediate Level

# Exploratory Data Analysis - Terrorism

# Name: Samrudhi Nawale



In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv(r'C:\Samrudhi\LetsGrowMore\globalterrorismdb_0718dist.csv', encoding='latin1')


In [None]:
df.head()


In [None]:
df.tail()


In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()


In [None]:
df.corr()

In [None]:
df.value_counts

In [None]:
df.columns

In [None]:
df.dtypes

In [None]:
df.nunique()

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

In [None]:
#Renaming and selecting the relevant columns


df.rename(columns={'iyear':'Year','imonth':'Month','iday':'Day','country_txt':'Country','provstate':'State','region_txt':'Region','city': 'City', 'latitude':'Latitude', 'longitude':'Longitude', 'attacktype1_txt':'Attack_Type','target1':'Target','nkill':'Killed','nwound':'Wounded','summary':'Summary','gname':'Group','targtype1_txt':'Target_Type','weaptype1_txt':'Weapon_Type','motive':'Motive'}, inplace=True)


In [None]:
df=df[['Year','Month','Day','Region','Country','State','City','Latitude','Longitude','Attack_Type','Killed','Wounded','Target','Summary','Group','Target_Type','Weapon_Type','Motive']]


In [None]:
# Dealing with missing values


df['Wounded'] = df['Wounded'].fillna(0).astype(int)
df['Killed'] = df['Killed'].fillna(0).astype(int)

In [None]:
df.head()

# Exploratory Data Analysis (EDA)


In [None]:
plt.figure(figsize=(20, 10))
sns.countplot('Year', data=df, edgecolor='black')
plt.xticks(rotation=90)
plt.title('No. of Terrorist Activities by Year',fontsize=20)
plt.xlabel('Year', fontsize=16)
plt.ylabel('Count', fontsize=16)
plt.show()

It is observed that terrorist activities have increased in the 2010s (as compared to earlier decades) and hit their peak in 2014.



In [None]:
plt.figure(figsize=(20, 10))
sns.countplot('Month', data=df, edgecolor='black')
plt.title('No. of Terrorist Activities by Month',fontsize=20)
plt.xlabel('Month', fontsize=16)
plt.ylabel('Count', fontsize=16)
plt.show()



The distribution for terrorist activities is pretty even across all months, although the month of May reports the highest no. of terrorist activities.



In [None]:
plt.figure(figsize=(20, 10))
sns.countplot('Day', data=df, edgecolor='black')
plt.title('No. of Terrorist Activities by Day', fontsize=20)
plt.xlabel('Day', fontsize=16)
plt.ylabel('Count', fontsize=16)
plt.show()

The distribution is very even across all days except for 31st, which is probably due to the fact that not every month has 31 days. It is observed that the most terrorist activities are reported on the 15th of a month.

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(df['Region'].value_counts()[:10].index,df['Region'].value_counts()[:10].values)
plt.title('No. of Terrorist Activities by Region', fontsize=20)
plt.xlabel('Regions', fontsize=16)
plt.ylabel('Count', fontsize=16)
plt.show()

It is observed that terrorist activities are more frequent in Middle Eastern and North African regions, followed by South Asia ranging above the 40000 mark. These regions can be identified as the hotzones for terrorism.

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(df['Country'].value_counts()[:10].index,df['Country'].value_counts()[:10].values)
plt.title('No. of Terrorist Activities by Country', fontsize=20)
plt.xlabel('Countries',fontsize=16)
plt.ylabel('Count',fontsize=16)
plt.show()

It is observed that Iraq is the most prone to terrorist activities followed by Pakistan, Afghanistan and India, eaching having suffered from over 10000 attacks in the past few decades.



In [None]:
plt.figure(figsize=(20,10))
sns.barplot(df['State'].value_counts()[:10].index,df['State'].value_counts()[:10].values)
plt.title('No. of Terrorist Activities by State', fontsize=20)
plt.xlabel('States',fontsize=16)
plt.ylabel('Count',fontsize=16)
plt.show()

It is observed that Baghdad reports the highest number of terrorist activities (over 7500) among other major states and provinces of the world, distantly followed by Northern Ireland.



In [None]:
plt.figure(figsize=(20,10))
sns.barplot(df['City'].value_counts()[1:11].index,df['City'].value_counts()[1:11].values)
plt.title('No. of Terrorist Activities by City', fontsize=20)
plt.xlabel('Cities',fontsize=16)
plt.ylabel('Count',fontsize=16)
plt.show()

It is observed that the city of Baghdad reports the highest number of terrorist activities (around 7500) among other major cities of the world, distantly followed by Karachi in Pakistan.

In [None]:
plt.figure(figsize=(20, 10))
sns.countplot('Attack_Type', data=df, order=df['Attack_Type'].value_counts().index)
plt.xticks(rotation=15)
plt.title('Terrorist Activities by Attack Type', fontsize=20)
plt.xlabel('Attack Type', fontsize=16)
plt.ylabel('Count', fontsize=16)
plt.show()    

It is observed that among the primary types of terrorist attacks, Bombings/Explosions are the most common, followed by Armed Assaults and Assassinations, albeit not very closely.

In [None]:
plt.figure(figsize=(20, 10))
sns.countplot('Target_Type', data=df, order=df['Target_Type'].value_counts()[:10].index)
plt.xticks(rotation=15)
plt.title('Terrorist Activities by Target Type', fontsize=20)
plt.xlabel('Target Type', fontsize=16)
plt.ylabel('Count', fontsize=16)
plt.show()

It is observed that most frequently, the targets of terrorist attacks tend to be Private Citizens (or civilians) and Property, followed by the military and police forces, as well as government and business bodies

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(df['Group'].value_counts()[1:16].values, df['Group'].value_counts()[1:16].index, orient='h')
plt.title('No. of Terrorist Activities by Terrorist Group', fontsize=20)
plt.xlabel('Groups', fontsize=16)
plt.ylabel('Count', fontsize=16)
plt.show()

It is observed that acts of terrorism have most frequently been orchestrated by the Taliban (ranging over 7000) followed by the ISIL, the SL, the FMLN, and Al-Shabaab.



In [None]:
#Recording Casualties


data = df[['Group','Country','Killed', 'Wounded']]
data = data.groupby(['Group','Country'], axis=0).sum().sort_values(['Killed', 'Wounded'], ascending=False).drop('Unknown').reset_index().head(10)
data

The ISIL has been most actively terrorising Iraq with over 31000 people killed and 23000 wounded. The Taliban has been committing its equal share of terrorism in Afghanistan with over 29000 killed and 27000 wounded.

In [None]:
casualties_by_year = df.pivot_table(columns='Year', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_year

Most deaths due to terrorism occurred in 2014, numbering at around 44000.



In [None]:
casualties_by_month = df.pivot_table(columns='Month', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_month.drop(0, axis=1)

Most deaths due to terrorism have occured in the month of July, numbering at around 38000.



In [None]:
casualties_by_day = df.pivot_table(columns='Day', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_day.drop(0, axis=1)

Most deaths due to terrorism have occured on the 11th of a month, numbering at around 17000.



In [None]:
casualties_by_region = df.pivot_table(columns='Region', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_region

Most deaths due to terrorism have occured in the Middle East and North Africa numbering at around 137000.



In [None]:
casualties_by_country = df.pivot_table(columns='Country', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_country

Most deaths due to terrorism have occured in Iraq, numbering at around 78000.



In [None]:
casualties_by_state = df.pivot_table(columns='State', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_state

Most deaths due to terrorism have occured in the province of Baghdad numbering at around 21000.



In [None]:
casualties_by_city = df.pivot_table(columns='City', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_city

Most deaths due to terrorism have occured in the city of Baghdad numbering at around 21000.



In [None]:
casualties_by_attack_type = df.pivot_table(columns='Attack_Type', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_attack_type

Most deaths due to terrorism have occured from armed assaults numbering at around 160000.



In [None]:
casualties_by_target_type = df.pivot_table(columns='Target_Type', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_target_type

Most deaths due to terrorism have occured among civilians and private citizens numbering at around 140000.



In [None]:
casualties_by_group = df.pivot_table(columns='Group', values=('Killed','Wounded'), aggfunc='sum')
casualties_by_group

Most deaths due to terrorism have been caused by the terrorist activities committed by Islamic State of Iraq and the Levant (ISIL) numbering at around 39000.

