## <b> The Global Terrorism Database (GTD) is an open-source database including information on terrorist attacks around the world from 1970 through 2017. The GTD includes systematic data on domestic as well as international terrorist incidents that have occurred during this time period and now includes more than 180,000 attacks. The database is maintained by researchers at the National Consortium for the Study of Terrorism and Responses to Terrorism (START), headquartered at the University of Maryland.</b>

# <b> Explore and analyze the data to discover key findings pertaining to terrorist activities. </b>

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from google.colab import drive
drive.mount('/content/drive')





Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [47]:
file_path=('/content/drive/My Drive/AlmaBetter/Capstone_project/Global Terrorism Data .csv')

df = pd.read_csv(file_path,encoding='ISO-8859-1')
print(df.columns.to_list())
print(df.shape)
print(df.size)
print(df.info())

  exec(code_obj, self.user_global_ns, self.user_ns)


['eventid', 'iyear', 'imonth', 'iday', 'approxdate', 'extended', 'resolution', 'country', 'country_txt', 'region', 'region_txt', 'provstate', 'city', 'latitude', 'longitude', 'specificity', 'vicinity', 'location', 'summary', 'crit1', 'crit2', 'crit3', 'doubtterr', 'alternative', 'alternative_txt', 'multiple', 'success', 'suicide', 'attacktype1', 'attacktype1_txt', 'attacktype2', 'attacktype2_txt', 'attacktype3', 'attacktype3_txt', 'targtype1', 'targtype1_txt', 'targsubtype1', 'targsubtype1_txt', 'corp1', 'target1', 'natlty1', 'natlty1_txt', 'targtype2', 'targtype2_txt', 'targsubtype2', 'targsubtype2_txt', 'corp2', 'target2', 'natlty2', 'natlty2_txt', 'targtype3', 'targtype3_txt', 'targsubtype3', 'targsubtype3_txt', 'corp3', 'target3', 'natlty3', 'natlty3_txt', 'gname', 'gsubname', 'gname2', 'gsubname2', 'gname3', 'gsubname3', 'motive', 'guncertain1', 'guncertain2', 'guncertain3', 'individual', 'nperps', 'nperpcap', 'claimed', 'claimmode', 'claimmode_txt', 'claim2', 'claimmode2', 'claim

In [49]:
#converting column names
df= df.rename(columns={'iyear':'year', 'imonth':'month','iday':'day','region_txt':'region_name','country_txt':'country_name','attacktype1_txt':'attacktype','weaptype1_txt':'weapon_name','targtype1_txt':'targtype'})
print(df.columns.to_list())

['eventid', 'year', 'month', 'day', 'approxdate', 'extended', 'resolution', 'country', 'country_name', 'region', 'region_name', 'provstate', 'city', 'latitude', 'longitude', 'specificity', 'vicinity', 'location', 'summary', 'crit1', 'crit2', 'crit3', 'doubtterr', 'alternative', 'alternative_txt', 'multiple', 'success', 'suicide', 'attacktype1', 'attacktype', 'attacktype2', 'attacktype2_txt', 'attacktype3', 'attacktype3_txt', 'targtype1', 'targtype', 'targsubtype1', 'targsubtype1_txt', 'corp1', 'target1', 'natlty1', 'natlty1_txt', 'targtype2', 'targtype2_txt', 'targsubtype2', 'targsubtype2_txt', 'corp2', 'target2', 'natlty2', 'natlty2_txt', 'targtype3', 'targtype3_txt', 'targsubtype3', 'targsubtype3_txt', 'corp3', 'target3', 'natlty3', 'natlty3_txt', 'gname', 'gsubname', 'gname2', 'gsubname2', 'gname3', 'gsubname3', 'motive', 'guncertain1', 'guncertain2', 'guncertain3', 'individual', 'nperps', 'nperpcap', 'claimed', 'claimmode', 'claimmode_txt', 'claim2', 'claimmode2', 'claimmode2_txt',

In [18]:
#No.of Global terrorist Attacks Yearwise
Year_wise_attacks=df.groupby(['year'])['year'].count().reset_index(name='attacks_year_wise')
print(Year_wise_attacks)



    year  attacks_year_wise
0   1970                651
1   1971                471
2   1972                568
3   1973                473
4   1974                581
5   1975                740
6   1976                923
7   1977               1319
8   1978               1526
9   1979               2662
10  1980               2662
11  1981               2586
12  1982               2544
13  1983               2870
14  1984               3495
15  1985               2915
16  1986               2860
17  1987               3183
18  1988               3721
19  1989               4324
20  1990               3887
21  1991               4683
22  1992               5071
23  1994               3456
24  1995               3081
25  1996               3058
26  1997               3197
27  1998                934
28  1999               1395
29  2000               1814
30  2001               1906
31  2002               1333
32  2003               1278
33  2004               1166
34  2005            

In [19]:
#Analysis of Attacks Success /Failed
attacks=df['year'].count()
success_attacks=df[df['success']==1]

# print(success_attacks.head(20))
no_of_success_attacks=success_attacks['success'].count()

failure_attacks=df[df['success']==0]

print(failure_attacks)
no_of_failure_attacks=failure_attacks['success'].count()




             eventid  year  month  day approxdate  extended resolution  \
6       197001020001  1970      1    2        NaN         0        NaN   
10      197001050001  1970      1    1        NaN         0        NaN   
23      197001190003  1970      1   19        NaN         0        NaN   
39      197001310001  1970      1   31        NaN         0        NaN   
43      197002020001  1970      2    2        NaN         0        NaN   
...              ...   ...    ...  ...        ...       ...        ...   
181650  201712300007  2017     12   30        NaN         0        NaN   
181674  201712310007  2017     12   31        NaN         0        NaN   
181679  201712310012  2017     12   31        NaN         0        NaN   
181689  201712310031  2017     12   31        NaN         0        NaN   
181690  201712310032  2017     12   31        NaN         0        NaN   

        country   country_name  region  ...  \
6           218        Uruguay       3  ...   
10          217  

In [None]:
# Analysis of Attacks Success and Failed Attacks
import pandas as pd
loj1= pd.merge(success_attacks, failure_attacks, how='outer',left_on='year',right_on='year') 
print(loj1.shape)
print(df.shape)
# x=loj1[['year','attacks_year_wise','eventid','success']]
# print(x)
# y=failure_attacks[['year','eventid','success']]
# print(y)
# analysis_of_attacks=pd.merge(x, y, how='left',left_on='year',right_on='year')
# print(analysis_of_attacks.shape)


In [None]:
print(attacks)
print(no_of_success_attacks)
print(no_of_failure_attacks)
Attacks_accuracy=(no_of_success_attacks/attacks)*100
print(Attacks_accuracy)

# Year_wise_attacks_top_countrys=Year_wise_attacks.sort_values.(by="attacks_year_wise",ascending=False)
Year_wise_top_attacks = Year_wise_attacks.sort_values(by='attacks_year_wise', ascending=False)
print(Year_wise_top_attacks)

In [None]:
max_num_attacks= Year_wise_attacks["attacks_year_wise"].max()

print(max_num_attacks)

In [None]:
#Graphical Analysis of No. of  global terrorist attack Year wise
Year_wise_attacks.plot(x='year',y='attacks_year_wise',kind='bar',figsize=(14, 4))

plt.title("Year wise Attacks",fontsize=30)
plt.xlabel("Year",fontsize=20)
plt.ylabel("No.of Attacks",fontsize=20)
plt.show()

In [None]:
#Most Affected Countrys Year wise
country_year_wise_attack=df.groupby(['country_name','year'])['country_name'].count().reset_index(name='no_attacks_year')

print(country_year_wise_attack)

In [None]:
#Most Affected Countrys From Past Years
attacks_in_country_wise = country_year_wise_attack.groupby(['country_name'])['no_attacks_year'].sum().reset_index(name='no_attacks_country')
print(attacks_in_country_wise)

In [None]:
#Most Affected Countrys From Past Years
attacks_in_country_wise.plot(x='country_name',y='no_attacks_country',kind='bar',figsize=(18,6))
plt.title("Attacks in country wise",fontsize=30)
plt.xlabel("Country Name",fontsize=20)
plt.ylabel("No.of Attacks in a country",fontsize=20)
plt.show()

In [None]:
#Top 10 Most Affected Countrys
df2=attacks_in_country_wise.sort_values(by='no_attacks_country', ascending=False)
most_affeted_countrys=df2.iloc[:10]
print(most_affeted_countrys.head(10))

In [None]:
most_affeted_countrys.plot(x='country_name',y='no_attacks_country',kind='barh',figsize=(16,6))
plt.title("Top 10 Affected CounQQ")
plt.title("Top 10 Affected Countrys",fontsize=30)
plt.xlabel("Country Name",fontsize=20)
plt.ylabel("No.of Attacks in a country",fontsize=20)
plt.show()


In [None]:
#Most Affected Country among the global Attacks
max_num_country_attacks= attacks_in_country_wise[attacks_in_country_wise['no_attacks_country'] == attacks_in_country_wise['no_attacks_country'].max()]
print(max_num_country_attacks)

In [None]:
#Top10 Countrys Most Affected in past year
#Country wise attacks descending
country_wise_top_attacks= most_affeted_countrys.sort_values(by='no_attacks_country', ascending=False)
print(country_wise_top_attacks)

In [None]:

most_affected_states =df.groupby(['provstate'])['provstate'].count().reset_index(name='state_attacks_year') 
print(most_affected_states)

In [None]:
#Top10 States Most Affected in past years
top_affected_states=most_affected_states.iloc[0:10].sort_values(by = 'state_attacks_year',ascending= False)
top_affected_states.rename(columns={'state_attacks_year':'state_attacks_year_s'})
print(top_affected_states)

In [None]:
top_affected_states.plot(x='provstate',y='state_attacks_year',kind='bar',figsize=(16,6))
plt.title("Top10 States Most Affected in past years",fontsize=30)
plt.xlabel("State Name",fontsize=20)
plt.ylabel("No.of Attacks in a State",fontsize=20)
plt.show()

In [None]:
# top 10 cities of the mostnq Attacked in Past Years

most_affected_city = df['city'].value_counts().reset_index(name='attacks_city_wise')
most_affected_cities=most_affected_city.rename(columns={'index':'cityname'}).sort_values(by='attacks_city_wise',ascending= False)
print(most_affected_cities)

In [None]:
#Top 10 Most Affected Cities
highly_most_affected_cities=most_affected_cities.iloc[0:10]
print(highly_most_affected_cities)

In [None]:
highly_most_affected_cities.plot(x='cityname',y='attacks_city_wise',kind='bar',figsize=(16,6))
plt.title("Top10 Cities Most Affected in past years",fontsize=30)
plt.xlabel("City Name",fontsize=20)
plt.ylabel("No.of Attacks in a City",fontsize=20)
plt.show()

In [None]:
#no.of Persons killed in the attacks Year Wise
no_killed_attacks=df.groupby('year', as_index=False).agg({"nkill": "sum"})
print(no_killed_attacks)

In [None]:
no_killed_attacks.plot(x='year',y='nkill',kind='bar',figsize=(16,6))
plt.title("No.of Persons killed in Attacks",fontsize=30)
plt.xlabel("Year",fontsize=20)
plt.ylabel("No.of Persons Killed",fontsize=20)
plt.show()

In [None]:
#AttackType
attack_type= df.groupby(['attacktype'])['attacktype'].count().reset_index(name='num_attacks')
# attack_type=df.groupby(['attacktype','year'])['attacktype'].count().reset_index(name='no_attacks_year')
print(attack_type)

In [None]:
attack_type.plot(x='attacktype',y='num_attacks',kind='bar',figsize=(16,6))
plt.title("Attack Types",fontsize=30)
plt.xlabel("attacktype",fontsize=20)
plt.ylabel("num_attacks",fontsize=20)
plt.show()

In [None]:
#Most Used Weapons
most_used_weapons= df.groupby(['weapon_name'])['weapon_name'].count().reset_index(name='no_times_used_weapons')
# attack_type= df.groupby(['attacktype'])['attacktype'].count().reset_index(name='num_attacks')
print(most_used_weapons)

In [None]:
most_used_weapons.plot(x='weapon_name',y='no_times_used_weapons',kind='bar',figsize=(16,6))
plt.title("Most Used Weapons",fontsize=30)
plt.xlabel("Weapon Names  ",fontsize=20)
plt.ylabel("No.of Times Used Weapons",fontsize=20)
plt.show()

In [23]:
#Terrorist Groups
terrorist_groups=df.groupby(['gname'])['gname'].count().reset_index(name='no_times_attacked')
no_terrorist_groups=terrorist_groups.sort_values(by='no_times_attacked', ascending= False)
print(no_terrorist_groups)

                                                 gname  no_times_attacked
3408                                           Unknown              82782
3138                                           Taliban               7478
1535       Islamic State of Iraq and the Levant (ISIL)               5613
2948                                 Shining Path (SL)               4555
1120  Farabundo Marti National Liberation Front (FMLN)               3351
...                                                ...                ...
1570   Jaish al-Muhajireen wal-Ansar (Muhajireen Army)                  1
1569                            Jaish al-Islam (Libya)                  1
1565                               Jaish Tahkim al-Din                  1
1564                Jaish Al-Umma (Army of the Nation)                  1
3536             leftist guerrillas-Bolivarian militia                  1

[3537 rows x 2 columns]


In [22]:
#What Terrorist Motivates
terrorist_motivates=df.groupby(['motive'])['year'].count().reset_index(name='no_times_motive')
no_terrorist_motivates=terrorist_motivates.sort_values(by='no_times_motive' ,ascending=False)
print(no_terrorist_motivates)
print(no_terrorist_motivates['no_times_motive'].sum())

                                                  motive  no_times_motive
14279                                            Unknown            14889
6290      The specific motive for the attack is unknown.            14430
9128   The specific motive is unknown; however, sourc...              297
6292     The specific motive for the attack is unknown..              148
6277   The specific motive for the attack is unknown ...              144
...                                                  ...              ...
5130   The attack was carried out because the victims...                1
5131   The attack was carried out because the victims...                1
5132   The attack was carried out because the victims...                1
5133   The attack was carried out because the victims...                1
7245   The specific motive is unknown; however, Tehri...                1

[14490 rows x 2 columns]
50561


In [None]:
#TerrorTarget Types
TerrorTarget=df['targtype'].value_counts().reset_index(name="target_count")
TerrorTargets=TerrorTarget.rename(columns={'index':'targets_of_terror_attacks'})
print(TerrorTargets)

In [None]:
TerrorTargets.plot(x='targets_of_terror_attacks',y='target_count',kind='bar',figsize=(16,6))
plt.title("TerrorTarget Types",fontsize=30)
plt.xlabel("Targets of Terror Attacks  ",fontsize=20)
plt.ylabel("Target Count",fontsize=20)
plt.show()

In [41]:
# Weapons Used in  Attacks
weapons_used_attacks=df.groupby(['weaptype'])['year'].count().reset_index(name='weapon_attacks')
print(weapons_used_attacks)

                                             weaptype  weapon_attacks
0                                          Biological              35
1                                            Chemical             321
2                                          Explosives           92426
3                                        Fake Weapons              33
4                                            Firearms           58524
5                                          Incendiary           11135
6                                               Melee            3655
7                                               Other             114
8                                        Radiological              14
9                                  Sabotage Equipment             141
10                                            Unknown           15157
11  Vehicle (not to include vehicle-borne explosiv...             136


In [50]:
# region_name
region_wise_attacks=df.groupby(['region_name'])['year'].count().reset_index(name='region_attacks')
print(region_wise_attacks)

                    region_name  region_attacks
0         Australasia & Oceania             282
1   Central America & Caribbean           10344
2                  Central Asia             563
3                     East Asia             802
4                Eastern Europe            5144
5    Middle East & North Africa           50474
6                 North America            3456
7                 South America           18978
8                    South Asia           44974
9                Southeast Asia           12485
10           Sub-Saharan Africa           17550
11               Western Europe           16639
