# Year-by-Year Choropleth Map

The year-by-year choropleth map was created with the references below:
1. understanding bokeh and adding a slider: https://pythonawesome.com/bokeh-plotting-backend-for-pandas-and-geopandas/
2. shape file for Syria: https://data.humdata.org/dataset/356a63e9-90aa-4b9c-a938-58ef24469c00
3. changing pandas data frame to geo data frame: https://gis.stackexchange.com/questions/174159/convert-a-pandas-dataframe-to-a-geodataframe

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas_bokeh

import os
import json

# call this so that running plot_bokeh won't create 
# a new window and results will be shown in notebook
pandas_bokeh.output_notebook()

In [None]:
# read the shape file and save it as a geo data frame
shp_file = os.path.join('syr_admin_shp_utf8_18219', 'syr_admin1.shp')
map_df   = gpd.read_file(shp_file)

# creates the map
# map_df.plot()

In [None]:
# read the VDC csv file and save it as a pandas data frame
dataset = pd.read_csv('vdc_data.csv', encoding='latin-1', dtype=str)

# print out the data frame
# dataset

In [None]:
# data has month and day, so we took a substring of the year of death
dataset['Year of Death'] = dataset['Date of death'].str[:4]

# counts the number of times a province is in the dataset for a certain year
province_count = dataset.groupby(['Province', 'Year of Death']).count()

In [None]:
# remove unnecessary columns to make data frame smaller
simplified_df = province_count.drop(province_count.columns[1:], axis=1)
simplified_df = simplified_df.reset_index()

In [None]:
# make it so years are columns rather than values
year_as_column = simplified_df.pivot_table('Unnamed: 0', 'Province', 'Year of Death')
year_as_column.reset_index(inplace=True)

In [None]:
# dropping irrelevant years
year_as_column = year_as_column.drop(['0000', '1970'], axis=1)

In [None]:
# changing province names by hand
name_change = {
    'Damascus Suburbs': 'Rural Damascus',
    'Daraa': 'Dar\'a',
    'Deir Ezzor': 'Deir-ez-Zor',
    'Hasakeh': 'Al-Hasakeh',
    'Idlib': 'Idleb',
    'Raqqa': 'Ar-Raqqa',
    'Sweida': 'As-Sweida'
}

# renames the provinces using name_change
year_as_column.replace(name_change, inplace=True)

In [None]:
# joining data from casualties (VDC) and geo data frame (shape file)
merged = year_as_column.set_index('Province').join(map_df.set_index('NAME_EN'))
merged.reset_index(inplace=True)

In [None]:
# dropping irrelevant information
# row where there were no data for geo data
merged = merged.drop([10, 15], axis=0)
# columns with information not pertaining to creating choropleth map
merged.drop(merged.columns[9:16], axis=1, inplace=True)

In [None]:
# Pandas dataframe to GeoDataFrame
from geopandas import GeoDataFrame
from shapely.geometry import Point

geometry = merged['geometry']
merged_gdf = merged.drop(['geometry'], axis=1)
crs = {'init': 'epsg:4326'}
gdf = GeoDataFrame(merged_gdf, crs=crs, geometry=geometry)

In [None]:
# specify slider columns:
slider_columns = ["201%d"%i for i in range(1, 8)]
slider_range = range(2011, 2018)

# make slider plot:
gdf.plot_bokeh(
    figsize=(900, 600),
    slider=slider_columns,
    slider_range=slider_range,
    slider_name="Year", 
    colormap='Inferno',
    hovertool_columns=["Province"],
    title="Deaths in Syria",
)

# Day-by-Day Choropleth Map

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas_bokeh

import os
import json
import pickle

# call this so that running plot_bokeh won't create 
# a new window and results will be shown in notebook
pandas_bokeh.output_notebook()

In [None]:
# read the shape file and save it as a geo data frame
shp_file = os.path.join('syr_admin_shp_utf8_18219', 'syr_admin1.shp')
map_df   = gpd.read_file(shp_file)

# creates the map
# map_df.plot()

In [None]:
# load the pickle objects as a pandas data frame
day_df = pickle.load(open('./death_by_province_by_day.pickle', 'rb'))

# read the VDC csv file specifically for column "geometry"
dataset = pd.read_csv('vdc_data.csv', encoding='latin-1', dtype=str)

# print out the data frame
# day_df

In [None]:
day_df['province'].unique()

In [None]:
# changing province names by hand
name_change = {
    'Damascus Suburbs': 'Rural Damascus',
    'Daraa': 'Dar\'a',
    'Deir Ezzor': 'Deir-ez-Zor',
    'Raqqa': 'Ar-Raqqa',
    'Sweida': 'As-Sweida',
    'Idlib': 'Idleb',
}

# renames the provinces using name_change
day_df.replace(name_change, inplace=True)

In [None]:
# make it so days are columns rather than values
pivoted_df = day_df.pivot_table('casualties','province','day').fillna(0)

In [None]:
# joining the data frames in order to obtain the geo data
use = pivoted_df.join(map_df.set_index('NAME_EN'))

In [None]:
# removing any unnecessary columns
ready = use.drop(columns=['NAM_EN_REF','NAME_AR','PCODE','ADM0_EN','ADM0_AR','ADM0_PCODE','UPDATE_DAT'])

In [None]:
# remove the "year-month-day time" and replace it with the "day"
for i in range(0, 2687):
    ready = ready.rename(index=str, columns={ready.columns[i]: str(i)})

In [None]:
# Pandas dataframe to GeoDataFrame
from geopandas import GeoDataFrame
from shapely.geometry import Point

geometry = ready['geometry']
crs = {'init': 'epsg:4326'}
day_gdf = GeoDataFrame(ready, crs=crs, geometry=geometry)

In [None]:
# make 'province' a column
day_gdf = day_gdf.reset_index()

In [None]:
# specify slider columns
slider_columns = []
for i in range (0, 2687):
    slider_columns.append(str(i))

slider_range = range(0, 2687)

# make slider plot
day_gdf.plot_bokeh(
    figsize=(900, 600),
    slider=slider_columns,
    slider_range=slider_range,
    slider_name="Day",
    # brute force color bar for map
    colormap=['#edf8f3', '#dcf2e8', '#cbebdd', '#b9e5d2', '#a8dfc7', '#97d8bc', '#85d2b1', '#74cba6', '#63c59b', '#52bf90',
              '#52bf90', '#49ab81', '#419873', '#398564', '#317256', '#295f48', '#204c39', '#18392b', '#18392b', '#18392b', 
              '#10261c', '#10261c', '#10261c', '#10261c', '#10261c', '#10261c', '#10261c', '#10261c', '#10261c', '#10261c', 
              '#0a1812', '#0a1812', '#0a1812', '#0a1812', '#0a1812', '#0a1812', '#0a1812', '#0a1812', '#0a1812', '#0a1812',
              '#08140f', '#08140f', '#08140f', '#08140f', '#08140f', '#08140f', '#08140f', '#08140f', '#08140f', '#08140f', 
              '#07110c', '#07110c', '#07110c', '#07110c', '#07110c', '#07110c', '#07110c', '#07110c', '#07110c', '#07110c', 
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', 
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', 
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', 
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', 
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e',
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e',
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', 
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e',
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e',
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e',
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', 
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', 
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', 
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', 
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e',
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e',
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', 
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e',
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e',
              '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e', '#08130e',
    ],
    hovertool_columns=["province"],
    title="Deaths in Syria",
)