#  LGM-VIP Data Science Internship
## Task 3: Exploratory Data Analysis on Dataset - Terrorism(Level-Intermediate)
## Batch: August 2022
## Author: Bhakti Pawar

# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Import the dataset

In [2]:
data=pd.read_csv("../input/terrorism/globalterrorismdb_0718dist.csv",encoding="latin-1")

In [3]:
# Preview first five rows of the data
data.head()    

In [4]:
data.shape

In [5]:
data.columns

In [6]:
# Printing all the column names of data
for i in data.columns:
    print(i,end=", ")

# Data Cleaning

In [7]:
# Check the null values
for i in data.columns:
    print(i,":", data.isnull().sum()[i])

In [8]:
# Choose the data you want
df=data[['iyear','imonth','iday','country_txt','provstate','location','summary',
         'success','suicide','region_txt','latitude','longitude','attacktype1_txt',
         'targtype1_txt','gname','weaptype1_txt','nkill','nwound']]

In [9]:
df.head()

In [10]:
df.shape

In [11]:
# Renaming the column names
df=df.rename(columns={'iyear':'Year','imonth':'Month','iday':'Day','country_txt':'Country',
                  'provstate':'State','location':'Location','summary':'Summary','success':'Success',
                  'suicide':'Suicide','region_txt':'Region','latitude':'Latitude',
                  'longitude':'Longitude','attacktype1_txt':'Attack_type',
                   'targtype1_txt':'Target_type','gname':'Group_name','weaptype1_txt':'Weapon_type',
                   'nkill':'Killed','nwound':'Wounded'})

# Data Exploration

In [12]:
df.info()

In [13]:
df

In [14]:
# Basic statistical data
df.describe()

**Observation:**
1. The data consist of terrorist activities ranging from the year 1970-2017.
2. The no. of people attempts suicide in these years is 1,81,691.
3. Maximum 1570 people were killed and 8191 were wounded.

In [15]:
df['Attack_type'].value_counts()

**Observation:**  Most of the attacks have been done by bomb/explosion

In [16]:
df['Country'].value_counts()

**Observation:** Most of the attacks have been done in the Iraq country.

In [17]:
# Printing all unique values of country
for i in df['Country'].unique():
    print(i,end=", ")

In [18]:
# Printing unique target types along with their count
df['Target_type'].value_counts()

In [19]:
# Printing unique weapon types along with their count
df['Weapon_type'].value_counts()

In [20]:
# Printing unique terrorist groups along with their count
df['Group_name'].value_counts()

In [21]:
# Priting Total no.of suicide attacks
df['Suicide'].value_counts()

**Observation:** The no.of suicide attacks: 6633

In [22]:
df['Success'].value_counts()

**Observation:** The no. of successful attacks: 161632

In [23]:
# Changing the default figure size
import matplotlib
matplotlib.rcParams['figure.figsize']=[16,6]

# Data Visualization

In [24]:
# Correlation
df.corr()    

In [25]:
# Visualizing Correlation
sns.heatmap(df.corr(),annot=True)

In [26]:
sns.set_style('darkgrid')       # Applying background style to the plot

# Total no.of people killed grouped by year
y=df.groupby(['Year'])[['Killed']].sum()  
y.head()


In [27]:
sns.barplot(y.index,y['Killed'])
plt.xticks(rotation=90)
plt.xlabel('Year',size=14)
plt.ylabel('No. of people killed',size=14)
plt.title('No. of people killed in every year',size=16)

**Observation:**
1. Maximum no.of people killed in the year 2014 
2. There is gradually ups and downs in the year 1978 to 2011
3. The no. of killed people increased rapidly from the year 2011
4. 40000+ people were killed in 2014

In [28]:
sns.countplot(df['Year'])
plt.xticks(rotation=90)
plt.xlabel('Year',size=14)
plt.ylabel('No.of Attacks',size=14)
plt.title('No.of attacks in each year',size=16)


**Observation:**
1. There is gradually increasing trend in the no. of attacks from the year 1970 to 1992
2. Most no. of attacks observed in the year 2014

In [29]:
# Total no.of people wounded by year
wounded_df=df.groupby(['Year'])[['Wounded']].sum()
wounded_df.head()

In [30]:
sns.barplot(wounded_df.index,wounded_df['Wounded'])
plt.xticks(rotation=90)
plt.xlabel('Year',size=14)
plt.ylabel('No. of people wounded',size=14)
plt.title('No. of people wounded in every year',size=16)

**Observation:**
1. Most no.of people wounded: 44043 in 2015
2. Least no.of people wounded: 82 in 1971

In [31]:
# Total no. of killed people grouped by region
r=df.groupby(['Region'])[['Killed']].sum().sort_values(by='Killed',ascending=False)
print(r)
plt.bar(r.index,r['Killed'],color='green')
plt.xticks(rotation=90)
plt.xlabel('Region',size=14)
plt.ylabel('No. of people killed',size=14)
plt.title('No.of people killed in each region',size=16)

**Observation:**
1. Most no.of people killed: 137642 in Middle East & North Africa  
2. Least no.of people killed: 1000 in Central Asia                  

In [32]:
sns.countplot(df['Region'])
plt.xticks(rotation=90)
plt.xlabel('Region',size=14)
plt.ylabel('No.of Attacks',size=14)
plt.title('No.of attacks in each Region',size=16)

**Observation:**
1. Most no.of attacks observed in the Middle East and North Africa
2. Least no.of attacks observed in Australasia and Oceania region

In [33]:
region_with_year=pd.crosstab(df['Year'],df['Region'])
region_with_year.head()

In [34]:
region_with_year.plot(kind='area')
plt.xlabel('Year',size=14)
plt.ylabel('No.of Attacks',size=14)
plt.title('Regionwise no.of Attacks in each year',size=16)

In [35]:
# Total no.of people wounded grouped by region
wound_region=df.groupby(['Region'])[['Wounded']].sum().sort_values(by='Wounded',ascending=False)
print(wound_region)
plt.bar(wound_region.index,wound_region['Wounded'])
plt.xticks(rotation=90)
plt.xlabel('Region',size=14)
plt.ylabel('No. of people wounded',size=14)
plt.title('No.of people wounded in each region',size=16)

**Observation:**
1. Most no.of people wounded: 214308 in Middle East and North Africa
2. Least no.of peole wounded: 260 in Australasia and Oceania

In [36]:
country=df['Country'].value_counts().head(10).sort_values(ascending=False)
print(country)
country.plot(kind='bar',color='green')
plt.xlabel('Country',size=14)
plt.ylabel('No.of Attacks',size=14)
plt.title('Top 10 countries affected by attack',size=16)

In [37]:
# As we observed that,most of the attacks have been done in the Iraq country.
# So let us analyze attacks in the states of Iraq
country_df=df[df['Country']=='Iraq']
sns.countplot(country_df['State'])
plt.xticks(rotation=90)
plt.xlabel('State',size=14)
plt.ylabel('No.of Attacks',size=14)
plt.title('No.of attacks in each State of Iraq country',size=16)

**Observation:** Most no.of attacks were in the Baghdad state of Iraq country. Baghdad state has became the main target for attacks in the Iraq.

In [38]:
state=df['State'].value_counts().head(10).sort_values(ascending=False)
print(state)
state.plot(kind='bar',color='red')
plt.xlabel('State',size=14)
plt.ylabel('No.of Attacks',size=14)
plt.title('Top 10 States affected by attack',size=16)

**Observation:** Most no.of attacks: 7645 in Baghdad state


In [39]:
success_df=pd.crosstab(df['Year'],df['Success'])
success_df.head()

In [40]:
success_df.plot(kind='line')
plt.xlabel('Year',size=14)
plt.ylabel('No.of attacks',size=14)
plt.title('Trend of successful attacks in each year',size=16)

In [41]:
suicide_df=pd.crosstab(df['Year'],df['Suicide'])
suicide_df.head()

In [42]:
suicide_df.plot(kind='line')
plt.xlabel('Year',size=14)
plt.ylabel('No.of attacks',size=14)
plt.title('Trend of Suicide attacks in each year',size=16)

In [43]:
attack_df=df['Attack_type'].value_counts()
attack_df.plot(kind='pie')

In [44]:
group_df=df['Group_name'].value_counts()[1:11].sort_values(ascending=False)
print(group_df)
group_df.plot(kind='bar',color='purple')
plt.xticks(rotation=90)
plt.xlabel('Terrorist Groups',size=14)
plt.ylabel('No.of attacks',size=14)
plt.title('Top 10 Terrorist Groups',size=16)

**Observation:** Most no.of attacks done by Taliban terrorist group


In [45]:
target_df=df['Target_type'].value_counts().sort_values(ascending=False)
print(target_df)
target_df.plot(kind='bar',color='orange')
plt.xticks(rotation=90)
plt.xlabel('Target types',size=14)
plt.ylabel('No.of attacks',size=14)
plt.title('Most frequent Target types',size=16)

**Observation:** Private citizens and Property were the main targets of terrorists