In [186]:
import pandas as pd
import numpy as np
from datetime import datetime as dt

# displaying options for data frames
pd.set_option("display.max_columns", 5400)
pd.set_option("display.max_colwidth", 500)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [187]:
bike_data = pd.read_csv("../data_clean/london_merged_clean.csv", encoding = "ISO-8859-1")
terrorism_data = pd.read_csv("../data_clean/global_terrorism_clean_uk_only.csv" , encoding = 'utf-8') #"ISO-8859-1")

In [188]:
terrorism_data = terrorism_data.drop(["Unnamed: 0"], axis=1)

In [189]:
df = bike_data.merge(terrorism_data, how = 'left', on = 'date')

In [190]:
df["event_id"]  = df["event_id"].replace(np.nan, 'NAN')

In [191]:
df["event_id"] = np.where(df["event_id"] != "NAN", True, False)

In [192]:
df.rename(columns = {"event_id" : "attack(y/n)"}, inplace = True)

In [193]:
df['date'] = pd.to_datetime(df['date'])

In [194]:
df = df.set_index("date")

In [195]:
df_base = df[[ "bike_rents", 'attack(y/n)']].copy()

In [196]:
df_base

# dropping duplicate date index rows
df_base = df_base.loc[~df_base.index.duplicated(keep='first')]

In [197]:
def get_dates_of_attack_lst(df):
    dates_of_attack = list(df.loc[df["attack(y/n)"] == True].index)
    return list(dict.fromkeys(dates_of_attack))
    

In [198]:
def get_date_index(date, days = 8):
    date_indexes = []
    week_before = list(pd.date_range(end = date, periods = days))
    week_after = list(pd.date_range(start = date, periods = days))
    for entry in week_before:
        date_indexes.append(entry)
    for entry in week_after:
        date_indexes.append(entry)
    return pd.to_datetime(list(dict.fromkeys(date_indexes)))

In [199]:
def get_rents_counts(df, date_index, days):
    time_ser = get_date_index(date_index , days)
    return df['bike_rents'][time_ser]

In [200]:
def get_diff(ser):
    chunk_size = len(ser)//2
    before_sum = sum(ser[0:chunk_size])
    after_sum =  sum(ser[(chunk_size+1):len(ser)])
    return after_sum - before_sum 

In [201]:
def get_diff_df(df, days = 8):
    lst_diff = [] 
    # get list of terrorism attack dates from df
    lst_of_attacks = get_dates_of_attack_lst(df)
    # for each attack date get list of surrounding dates depending on days passed to the function
    # and from that list get a a series of bike rent counts for that time period
    for elem in lst_of_attacks:
        time_series = get_rents_counts(df, elem, days)
        # dealing with missing values at the beginning and the end of the date series by skipping them
        if time_series.isnull().sum() != 0:
            continue
        lst_diff.append(get_diff(time_series))
    return sum(lst_diff)/len(lst_diff)


In [209]:
get_diff_df(df_base, 12)

-226.20118343195267

In [202]:
get_diff_df(df_base, 8)

-1360.7882352941176

In [203]:
get_diff_df(df_base, 4)

-1310.3139534883721

In [207]:
get_diff_df(df_base, 3)

181.27167630057804

In [208]:
get_diff_df(df_base, 2)

504.7586206896552

In [204]:
def get_before_after_tup(ser):
    lst_b_a = []
    chunk_size = len(ser)//2
    lst_b_a.append(sum(ser[0:chunk_size]))
    lst_b_a.append(sum(ser[(chunk_size+1):len(ser)]))
    return tuple(lst_b_a) 

In [211]:
def get_befre_after_df(df, days = 8):
    lst_before_after = [] 
    # get list of terrorism attack dates from df
    lst_of_attacks = get_dates_of_attack_lst(df)
    # for each attack date get list of surrounding dates depending on days passed to the function
    # and from that list get a a series of bike rent counts for that time period
    for elem in lst_of_attacks:
        time_series = get_rents_counts(df, elem, days)
        # dealing with missing values at the beginning and the end of the date series by skipping them
        if time_series.isnull().sum() != 0:
            continue
        lst_before_after.append(get_before_after_tup(time_series))
    return pd.DataFrame(lst_before_after, columns = ["before_sum", "after_sum"])

In [229]:
df_corr = get_befre_after_df(df_base, 3)

In [230]:
df_corr.corr(method="pearson")

Unnamed: 0,before_sum,after_sum
before_sum,1.0,0.574
after_sum,0.574,1.0


In [214]:
df.head(1)

Unnamed: 0_level_0,bike_rents,temp_actual(C),temp_feeling(C),humidity,wind_speed,weather_condition,holiday(y/n),weekend(y/n),season,attack(y/n),state,city,location,event_summary,multiple_perps(y/n),attack_succeeded(y/n),suicide_attack(y/n),attack_type,target_type,target_subtype,preps_group_name,affiliated(y/n),no_of_perps,people_killed,people_wounded,weapon_type
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
2015-01-04,9234,2.48,0.65,94.27,7.5,partly cloudy,False,True,winter,False,,,,,,,,,,,,,,,,


## no Northen Ireland

In [None]:
df_no_NI = df[[ "bike_rents", 'attack(y/n)', 'state']].copy()

In [None]:
df_no_NI

In [215]:
def get_diff_df_1(df, days = 8):
    lst_diff = [] 
    # get list of terrorism attack dates from df
    lst_of_attacks = list(df.loc[(df["attack(y/n)"] == True) & (df["state"] != "Northern Ireland")].index)
    lst_of_attacks = list(dict.fromkeys(lst_of_attacks))
    # for each attack date get list of surrounding dates depending on days passed to the function
    # and from that list get a a series of bike rent counts for that time period
    for elem in lst_of_attacks:
        time_series = get_rents_counts(df, elem, days)
        # dealing with missing values at the beginning and the end of the date series by skipping them
        if time_series.isnull().sum() != 0:
            continue
        lst_diff.append(get_diff(time_series))
    return sum(lst_diff)/len(lst_diff)
df_no_NI = df_no_NI.loc[~df_no_NI.index.duplicated(keep='first')]

In [216]:
get_diff_df_1(df_no_NI, 8)

919.5

In [217]:
get_diff_df_1(df_no_NI, 4)

-1929.423076923077

In [218]:
get_diff_df_1(df_no_NI, 3)

-569.1538461538462

In [232]:
df.head(1)

Unnamed: 0_level_0,bike_rents,temp_actual(C),temp_feeling(C),humidity,wind_speed,weather_condition,holiday(y/n),weekend(y/n),season,attack(y/n),state,city,location,event_summary,multiple_perps(y/n),attack_succeeded(y/n),suicide_attack(y/n),attack_type,target_type,target_subtype,preps_group_name,affiliated(y/n),no_of_perps,people_killed,people_wounded,weapon_type
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
2015-01-04,9234,2.48,0.65,94.27,7.5,partly cloudy,False,True,winter,False,,,,,,,,,,,,,,,,


In [233]:
test = df.loc[(df["attack(y/n)"] == True) & (df["state"] != "Northern Ireland")].index

In [234]:
len(test)

30

In [235]:
df['weapon_type'].value_counts()

Explosives    86
Incendiary    85
Firearms      31
Melee         12
Other          2
Name: weapon_type, dtype: int64

In [241]:
test1 = df.loc[(df["attack(y/n)"] == True) & (df["state"] != "Northern Ireland") & (df["weather_condition"] != "rain") & (df["weather_condition"] != "snow" ) ].index

In [242]:
len(test1)

26

In [243]:
df["weather_condition"].value_counts()

clear            317
few clouds       173
partly cloudy    171
rain              82
cloudy            31
snow               1
Name: weather_condition, dtype: int64