# Dealing with Structured Data- Global Terrorism Database

### Thanks to Samuel Capps (6980 2020 QTR 1) for identifying this dynamic

In [None]:
# Import libraries to aid your analysis
import pandas as pd #pandas places ones data in tables format
from bokeh.plotting import figure, output_notebook, show #builds interactive graphs for python
from bokeh.models import Range1d
import math #this is used in graphic section to use the irrational number pi
output_notebook() #Allows inline plotting for Juptyer notebook

### Let's look at the data

In [None]:
# Datasheet was downbloaed form GTD and being read in form a local .csv file
GTD = pd.read_csv(r"C:\Users\ymamo\Documents\Classes\688\Fall_2021\Lesson 2\Code\globalterrorismdb_0919dist.csv",
                  encoding = "ISO-8859-1", engine = 'python') 
# had some issues with the data types, had to search online for example to read properly
GTD.head() # see the structure--default is first five rows

In [None]:
# Pandas library has a describe function to apply algorithms against the data...
# whether approriate or not
GTD.describe()

In [None]:
#Identifies the total number of rows
len(GTD)

In [None]:
#provides information about the dataframe
GTD.info()

In [None]:
#See the titles of the 135 columns
for col in GTD.columns:
    print(col)
# !!!! Look in the GTD_Codebook to get more information about each column !!!!

# Problem 1: Change the Country (do after going through the whole notebook)

In [None]:
# reduce the dataframe down to just the philippines
country = GTD[GTD["country_txt"]=="Philippines"] # store the information in an object called country
#you can change Philippines to any country in the dataset
country.head(10) #by putting in 10 I changed the default setting of 1st 5 rows to 1st 10 rows

### Let's deep dive in a country 

In [None]:
attacks_by_group = {} #make an empty datastructure (dictionary) to fill

#This loop goes through each row and counts the number of entries by group
for index, row in country.iterrows(): 
    if row["gname"]  in attacks_by_group.keys():
        attacks_by_group[row["gname"]] += 1 #if group is in the dictionary add 1 attack
    else:
        attacks_by_group[row["gname"]] = 1 #add group name to dictionary if not in dictionary

attacks_by_group 

In [None]:
# Make X and Y axis for a bar chart plotting attacks
groups = list(attacks_by_group.keys())
attacks = list(attacks_by_group.values())

In [None]:
#Sort from smallest to largest
sorted_groups = sorted(groups, key=lambda x: attacks[groups.index(x)])
sorted_attacks = sorted(attacks)


# Problem 2: For any country only do plot the top 5 groups (Hint: Use list slicing e.g sorted_attacks[-10:]) 

In [None]:
# Uses the bokeh library to plot an interactive graph ---this is very basic view of its capability
# makes the figure
p = figure(x_range = sorted_groups, plot_width=800, plot_height=1200)
#Plots the data
p.vbar(x=sorted_groups, width=0.5, bottom=0,
       top= sorted_attacks, color="firebrick")

p.xaxis.major_label_orientation = math.pi/2  #Rotates the labels on the X axis



In [None]:
show(p)

## Now lets plot the attacks in a map

In [None]:
#Imports necessary aspects of Bokeh for plotting on a map
from bokeh.tile_providers import get_provider, Vendors
from pyproj import Transformer
tile_provider = get_provider('STAMEN_TERRAIN')

In [None]:
# Take the data reduced to a country and get the lat/long of the attacks and the name of the group
country_map = country[["latitude", 'longitude', 'gname']]

#see the data this time first 7 rows
country_map.head(7)

In [None]:
#create pyproj transformer to convert form lat/long to web mercator
transformer = Transformer.from_crs('epsg:4326','epsg:3857')

In [None]:
map_dict = {} # empty dictionary to track group attacks by lat long
nan_count = {} # some data doesn't have a lat/long so we need to know what we are losing


# Iterate through tables and associate group with lat/long
for idx, row in country_map.iterrows():    
    if row['gname'] in map_dict.keys(): 
        if math.isnan(row["latitude"]):
            #This counts no data
            if row['gname'] in nan_count.keys(): 
                nan_count[row['gname']] += 1 
            else: 
                nan_count[row['gname']] = 1
        else: 
            #This has to convert the lat/long to a mercator projection 
            point = transformer.transform(row["latitude"],row["longitude"])
            map_dict[row['gname']].append([point[0],point[1]]) 
    #BOTH the if an else statement do the same thing but since it is a dictionary one needs to add the group name first
    else:  
        if math.isnan(row["latitude"]):
          nan_count[row['gname']] = 1
        else: 
            point = transformer.transform(row["latitude"],row["longitude"])
            map_dict[row['gname']] =[[point[0],point[1]]]
        
#This tells how many attacks we are losing
nan_count                     

# Problem 3: Print the top 5 groups from the map_dict structure

# Problem 4: Change the map to show a new country. 
## You can get the lat/long for different places here : https://www.latlong.net/

In [None]:
pts = [(5.00,126.53), (19.00,117.17)]
bbox = []
for pt in transformer.itransform(pts): 
    bbox.append(pt)


In [None]:
NPA_x = []
NPA_y = []
for pt in map_dict["New People's Army (NPA)"]:
    NPA_x.append(pt[0])
    NPA_y.append(pt[1])

In [None]:
#Plots the bounding box
p = figure(x_range=(bbox[0][0], bbox[1][0]),y_range=(bbox[0][1], bbox[1][1]),x_axis_type="mercator", y_axis_type="mercator")
#add the map form the Bokeh map vendor in this case Stamen_Terrain --- see documentation
p.add_tile(tile_provider)
# Places a circle for each converted lat/long attack 
p.circle(x = NPA_x, y = NPA_y, color= "firebrick")

#shows the plot
show(p)