In [363]:
import pandas as pd
import numpy as np
import re

In [364]:
# import raw data set

df = pd.read_csv("../data_raw/global_terrorism.csv")

In [365]:
# display options

pd.set_option("display.max_columns", 500)
pd.options.display.max_rows = 900

In [366]:
# selecting years we are interested in (2015 – 2016)

df = df.loc[(df["iyear"] >= 2015) & (df["iyear"] <= 2016)]
df.reset_index(drop=True, inplace=True)

In [367]:
# selecting columns we want to keep in the clean data set

columns_name = [ 
'eventid',
'iyear',
'imonth',
'iday',
'approxdate',
'country_txt',
'provstate',
'city',
'location',
'summary',
'multiple',
'success',
'suicide',
'attacktype1_txt',
'targtype1_txt',
'targsubtype1_txt',
'gname',
'individual',
'nperps',
'nkill',
'nwound',
'propvalue',
'weaptype1_txt']

In [368]:
# reassigning dataframe with selected columns

df = df[columns_name]

In [369]:
# replacing default value of -99 in integer type columns with np.nan

col_int_to_nan= ['nperps']

for col in col_int_to_nan:
    df[col] =df[col].replace(-99, np.nan)
    df[col] =df[col].astype(pd.Int64Dtype()) 

In [370]:
# replacing value "Unknown" in string object type columns with np.nan

col_str_to_nan = ["attacktype1_txt", "targtype1_txt", "gname", "weaptype1_txt"]

for col in col_str_to_nan:
    df[col] = df[col].replace("Unknown", np.nan)

In [371]:
# change column type of string object columns to integer

col_to_int = ['nkill', 'nwound']

for col in col_to_int:
    df[col] = df[col].astype(pd.Int64Dtype())

In [372]:
# change column type of integer columns to boolean

col_to_bool = ['multiple', 'individual', 'success', 'suicide']

for col in col_to_bool:
    df[col] = df[col].astype("bool")

In [373]:
# reverting the True/False logic of the individual column because we want it to represent 
# affiliation (we are going to change the column name later.)

df["individual"] = np.where(df["individual"] == False, True, False)

In [374]:
# creating temporary dataframe to create date column out of year, month and day

date = pd.DataFrame({'year' : list(df['iyear']),
                       'month' : list(df['imonth']),
                       'day' : list(df['iday'])})

date_col = pd.to_datetime(date)
date_col = date_col.rename("date")

In [375]:
# merging date dataframe into main dataframe

df = pd.merge(df, date_col, left_index=True, right_index = True)

In [376]:
# dropping columns we no longer need or holding duplicate/not useful information for our purposes
# year, month, day: now present in the date column
# approxdate: no longer needed since all events left do have a concrete date
# country_txt: no longer needed since our raw data set only contains events happend in the UK
# propvalue: holds only one non nan value so we decided to drop the column

df.drop(["iyear", "imonth", "iday", "approxdate", "country_txt", "propvalue"], axis = 1, inplace = True )

In [377]:
# renaming columns for better understanding

new_col_names = ["event_id", 
                 "state", 
                 "city", 
                 "location", 
                 "event_summary", 
                 "multiple_perps(y/n)", 
                 "attack_succeeded(y/n)", 
                 "suicide_attack(y/n)", 
                 "attack_type", 
                 "target_type", 
                 "target_subtype", 
                 "preps_group_name", 
                 "affiliated(y/n)", 
                 "no_of_perps", 
                 "people_killed", 
                 "people_wounded", 
                 "weapon_type", 
                 "date"]

df.columns = new_col_names

In [379]:
# writing clean and condensed data set to csv

df.to_csv("../data_clean/global_terrorism_clean_uk_only.csv")
df.head()

Unnamed: 0,event_id,state,city,location,event_summary,multiple_perps(y/n),attack_succeeded(y/n),suicide_attack(y/n),attack_type,target_type,target_subtype,preps_group_name,affiliated(y/n),no_of_perps,people_killed,people_wounded,weapon_type,date
0,201501050067,Northern Ireland,Balleymoney,,01/05/2015: Assailants attacked Brian McIlhagg...,False,True,False,Armed Assault,Private Citizens & Property,Named Civilian,Loyalists,True,4.0,1,1,Firearms,2015-01-05
1,201501080057,Northern Ireland,Belfast,,01/08/2015: An explosive device hidden inside ...,False,False,False,Bombing/Explosion,Police,Police Security Forces/Officers,,True,,0,0,Explosives,2015-01-08
2,201501170069,Northern Ireland,Londonderry,,01/17/2015: Assailants opened fire on patrons ...,False,True,False,Armed Assault,Business,Restaurant/Bar/Café,Dissident Republicans,True,,0,0,Firearms,2015-01-17
3,201501200046,Northern Ireland,Belfast,,01/20/2015: An explosive device was discovered...,False,False,False,Bombing/Explosion,,,,True,,0,0,Explosives,2015-01-20
4,201501300075,Northern Ireland,Belfast,,01/30/2015: Assailants opened fire on a civili...,False,True,False,Armed Assault,Private Citizens & Property,Named Civilian,Dissident Republicans,True,,0,1,Firearms,2015-01-30
