In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', -1)

import json

import numpy as np

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
matplotlib.rcParams['pdf.fonttype'] = 42
%matplotlib inline
plt.style.use('fivethirtyeight')

import seaborn as sns


## Fixing the million columns

In [2]:
dtypes = {
    'eventid': 'int',
    'iyear': 'int',
    'imonth': 'str',
    'iday': 'str',
    'country_txt': 'category',
    'region_txt': 'category',
    'provstate': 'category',
    'city': 'str',
    'latitude': 'float',
    'longitude': 'float',
    'summary': 'str',
    'crit1': 'int',
    'crit2': 'int',
    'crit3': 'int',
    'doubtterr': 'float',
    'success': 'float',
    'suicide': 'float',
    'attacktype1_txt': 'str',
    'attacktype2_txt': 'str',
    'attacktype3_txt': 'str',
    'targtype1_txt': 'str',
    'targsubtype1_txt': 'str',
    'natlty1_txt': 'str',
    'gname': 'category',
    'motive': 'str',
    'nperps': 'float',
    'nperpcap': 'float',
    'claimed': 'float',
    'weaptype1_txt': 'category',
    'weaptype2_txt': 'category',
    'nkill': 'float',
    'nkillter': 'float',
    'nwound': 'float',
    'propextent_txt': 'str',
    'nhostkid': 'float'
}

In [None]:
df = pd.read_excel('data/globalterrorismdb_0718dist.xlsx', na_values=['nan'], dtype=dtypes, usecols=['eventid', 'iyear', 'imonth', 'iday', 'country','country_txt', 'region_txt','provstate','city','latitude','longitude','summary','crit1','crit2','crit3','doubtterr','success','suicide','attacktype1_txt','attacktype2_txt','attacktype3_txt','targtype1_txt','targsubtype1_txt','natlty1_txt','gname','motive','nperps','nperpcap','claimed','weaptype1_txt','weaptype2_txt','weapdetail','nkill','nkillter','nwound','propextent_txt','nhostkid'])
df

## Reading in Census Bureau populations
Using 2018 population estimations

In [None]:
pops = pd.read_csv("data/scprc-est2018-18+pop-res.csv")

In [None]:
pops.head()

## I only want to look at U.S. events

In [None]:
df = df[(df['country_txt'] == 'United States') & (df.iyear > 2001)]

In [None]:
df.gname.shape

In [None]:
df[df.gname != "Unknown"].gname.value_counts().head(25).sort_values().plot(kind='barh', figsize=(5,12))

In [None]:
df.nkill = df.nkill.astype(str)

In [None]:
## Export for qgis
df.to_csv("data/terrorism_us.csv", index=False)

## Mapbox?

In [None]:
def get_coords(x):
#     geometry = {}
    try:
        try:
            x['place'] = x.city+", "+x.provstate
        except:
            x['place'] = x.city
        x['geometry.type'] = 'Point'
        coords = []
        coords.append(x.longitude)
        coords.append(x.latitude)
        x['geometry.coordinates'] = coords
        return x
    except: 
        return nan

In [None]:
df = df.apply(get_coords, axis=1)

In [None]:
def prop_ids(x):
    if x['attacktype1_txt'] == "Facility/Infrastructure Attack":
        x['properties.name'] = x['attacktype1_txt']
        x['properties.color'] = "#20365E"
        x['properties_color'] = x['properties.color']
        x['properties.group_name'] = x['properties.name']
        x['properties.group_id'] = 1
        x['properties.radius'] = x.nkill
        return x
    elif x['attacktype1_txt'] == "Armed Assault":
        x['properties.name'] = x['attacktype1_txt']
        x['properties.color'] = "#E84C3D"
        x['properties_color'] = x['properties.color']
        x['properties.group_name'] = x['properties.name']
        x['properties.group_id'] = 2
        x['properties.radius'] = x.nkill
        return x
    elif x['attacktype1_txt'] == "Bombing/Explosion":
        x['properties.name'] = x['attacktype1_txt']
        x['properties.color'] = "#D25302"
        x['properties_color'] = x['properties.color']
        x['properties.group_name'] = x['properties.name']
        x['properties.group_id'] = 3
        x['properties.radius'] = x.nkill
        return x
    elif x['attacktype1_txt'] == "Unarmed Assault":
        x['properties.name'] = x['attacktype1_txt']
        x['properties.color'] = "#FDDBB0"
        x['properties_color'] = x['properties.color']
        x['properties.group_name'] = x['properties.name']
        x['properties.group_id'] = 4
        x['properties.radius'] = x.nkill
        return x
    elif x['attacktype1_txt'] == "Hostage Taking (Barricade Incident)":
        x['properties.name'] = x['attacktype1_txt']
        x['properties.color'] = "#354A5F"
        x['properties_color'] = x['properties.color']
        x['properties.group_name'] = x['properties.name']
        x['properties.group_id'] = 5
        x['properties.radius'] = x.nkill
        return x
    elif x['attacktype1_txt'] == "Assassination":
        x['properties.name'] = x['attacktype1_txt']
        x['properties.color'] = "#9A59B5"
        x['properties_color'] = x['properties.color']
        x['properties.group_name'] = x['properties.name']
        x['properties.group_id'] = 6
        x['properties.radius'] = x.nkill
        return x
    elif x['attacktype1_txt'] == "Hostage Taking (Kidnapping)":
        x['properties.name'] = x['attacktype1_txt']
        x['properties.color'] = "#3598DB"
        x['properties_color'] = x['properties.color']
        x['properties.group_name'] = x['properties.name']
        x['properties.group_id'] = 7
        x['properties.radius'] = x.nkill
        return x
    elif x['attacktype1_txt'] == "Hijacking":
        x['properties.name'] = x['attacktype1_txt']
        x['properties.color'] = "#1CBB9B"
        x['properties_color'] = x['properties.color']
        x['properties.group_name'] = x['properties.name']
        x['properties.group_id'] = 8
        x['properties.radius'] = x.nkill
        return x
    else:
        x['properties.name'] = x['attacktype1_txt']
        x['properties.color'] = "#1CBB9B"
        x['properties_color'] = x['properties.color']
        x['properties.group_name'] = x['properties.name']
        x['properties.group_id'] = 9
        x['properties.radius'] = x.nkill
        return x

In [None]:
df = df.apply(prop_ids, axis=1)

In [None]:
def prop_headline(x):
    return x

In [None]:
df['properties.headline'] = df.gname.apply(lambda x: prop_headline(x))

In [None]:
df.head()

In [None]:
def prop_article(row):
    return f"\
    <div id='article_container'>\
    <h2>{row.gname}</h1>\
    <div id='headline'><h4 style='border-bottom: 5px solid {row.properties_color}'>{row.attacktype1_txt}</h2></div>\
    <p >{row.summary}</p>\
    <p><b>People killed:</b> {row.nkill} people</p>\
    <p><b>People Wounded:</b> {row.nwound}.</p>\
    <p><b>Target:</b> {row.targtype1_txt}\
    </div>" 


In [None]:
df['properties.article'] = df.apply(prop_article, axis=1)

In [None]:
df.to_csv("terrorism_us_map.csv", index=False)

In [None]:
map_df = pd.read_csv("terrorism_us_map.csv", thousands=",", usecols=['geometry.coordinates', 'geometry.type', 'properties.name', 'properties.headline', 'properties.article', 'properties.color', 'properties.group_id', 'properties.group_name', 'properties.radius'])

In [None]:
map_df['geometry.coordinates'] = map_df['geometry.coordinates'].apply(json.loads)

In [None]:
ok_json = json.loads(map_df.to_json(orient='records'))

In [None]:
def process_to_geojson(file):
    geo_data = {"type": "FeatureCollection", "features":[]}
    for row in file:
        this_dict = {"type": "Feature", "properties":{}, "geometry": {}}
        for key, value in row.items():
            key_names = key.split('.')
            if key_names[0] == 'geometry':
                this_dict['geometry'][key_names[1]] = value
            if str(key_names[0]) == 'properties':
                this_dict['properties'][key_names[1]] = value
        geo_data['features'].append(this_dict)
    return geo_data

In [None]:
geo_format = process_to_geojson(ok_json)

In [None]:
with open('geo-data.js', 'w') as outfile:
    outfile.write("var infoData = " + json.dumps(geo_format))

## Zooming out to look at each state

In [None]:
state_counts = pd.DataFrame(df['provstate'].value_counts()).reset_index()

In [None]:
state_counts.head()

In [None]:
state_counts = state_counts.merge(pops, left_on='index', right_on='NAME')

In [None]:
state_counts['per_capita'] = (state_counts['provstate'] / state_counts['POPESTIMATE2018']) * 10000000
state_counts

In [None]:
df.groupby(by='provstate').targsubtype1_txt.value_counts()

In [None]:
df.motive

In [None]:
df.groupby(by='provstate').gname.value_counts().unstack('targsubtype1_txt').plot(kind='barh', stacked=True, figsize=(10,15), legend=True, colormap=ListedColormap(sns.color_palette("deep", 50)))

In [None]:
state_counts.sort_values(by='per_capita').plot(kind="barh", x='index', y='per_capita', figsize=(7,15), legend=False)

In [None]:
state_counts.sort_values(by='provstate').plot(kind="barh", x='index', y='provstate', figsize=(7,15), legend=False)