In [1]:
import requests
import pickle
import time
import tqdm
import glob
import os
import re

import pandas as pd
import numpy  as np

from statistics  import mean
from collections import defaultdict
from bs4         import BeautifulSoup as bs

import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

# highest_number  = 202383
# stated_number   = 191776
# recorded_number = 140281

def save(obj, name):
    pickle.dump(obj, open(name + '.pickle', 'wb'))

def load(name):
    return pickle.load(open(name + '.pickle', 'rb'))

In [2]:
def create_death_by_year_day():
    vdc_df = load('vdc_df')

    scratch = vdc_df [['Province',
                        'Sex',
                        'Status',
                        'Date of death',
                        'Cause of Death',
                        'Actors',
                        'Rank / Military situation']].copy()

    picked = ['Damascus Suburbs', 
              'Idlib'     , 
              'Aleppo'    , 
              'Hama'      , 
              'Daraa'     , 
              'Quneitra'  , 
              'Damascus'  , 
              'Homs'      , 
              'Deir Ezzor', 
              'Raqqa'     , 
              'Tartous'   , 
              'Lattakia'  , 
              'Sweida'
    ]

    scratch = scratch[scratch['Province'].isin(picked)]
    scratch = scratch[scratch['Date of death'] != '0000-00-00']
    scratch = scratch[scratch['Date of death'] != '1970-01-01']
    scratch['Date of death'] = pd.to_datetime(scratch['Date of death'])

    death_by_province_by_day         = pd.DataFrame(scratch.groupby(['Province', scratch['Date of death']]).agg({'Date of death' : 'count'}))
    death_by_province_by_day.columns = ['count']
    death_by_province_by_day         = death_by_province_by_day.reset_index()
    death_by_province_by_day.columns = ['province', 'day', 'casualties']

    death_by_province_by_year         = pd.DataFrame(scratch.groupby(['Province', scratch['Date of death'].dt.year]).agg({'Date of death' : 'count'}))
    death_by_province_by_year.columns = ['count']
    death_by_province_by_year         = death_by_province_by_year.reset_index()
    death_by_province_by_year.columns = ['province', 'year', 'casualties']
    
    return scratch, death_by_province_by_day, death_by_province_by_year

In [3]:
# scratch, death_by_province_by_day, death_by_province_by_year = create_death_by_year_day()
# save(scratch, 'scratch')
# save(death_by_province_by_day , 'death_by_province_by_day')
# save(death_by_province_by_year, 'death_by_province_by_year')

scratch                   = load('scratch')
death_by_province_by_year = load('death_by_province_by_year')
death_by_province_by_day  = load('death_by_province_by_day')

# Line Chart

In [4]:
years     = list(death_by_province_by_year['year'].unique())
provinces = list(death_by_province_by_year['province'].unique())

list_of_traces = []

for province in provinces:
    data = death_by_province_by_year[death_by_province_by_year['province'] == province]
    
    trace = go.Scatter(
                name = province,
                x    = years,
                y    = data['casualties']
        
    )
    
    list_of_traces.append(trace)
    

iplot(list_of_traces)

# Grouped Bar Chart

In [5]:
years     = list(death_by_province_by_year['year'].unique())
provinces = list(death_by_province_by_year['province'].unique())

list_of_traces = []

for province in provinces:
    data = death_by_province_by_year[death_by_province_by_year['province'] == province]
    
    trace = go.Bar(
                name = province,
                x    = years,
                y    = data['casualties']
        
    )
    
    list_of_traces.append(trace)
    
iplot(list_of_traces)


# More work needs to be done to sort the data before it gets to plotly, 
# but this is cumbersome, so I'll avoid it for now

# Stacked Bar Chart

In [6]:
years     = list(death_by_province_by_year['year'].unique())
provinces = list(death_by_province_by_year['province'].unique())

list_of_traces = []

for province in provinces:
    data = death_by_province_by_year[death_by_province_by_year['province'] == province]
    
    trace = go.Bar(
                name = province,
                x    = years,
                y    = data['casualties']
        
    )
    
    list_of_traces.append(trace)
    
layout = go.Layout(
            barmode='stack'
)
    
fig = go.Figure(data=list_of_traces, layout=layout)
iplot(fig)


# Histogram

In [7]:
data = [go.Histogram(x = scratch['Date of death'])]
iplot(data)

In [8]:
death_by_province_by_day['casualties'].unique()

array([  2,   1,   4,   3,   9,   6,   8,   7,   5,  11,  13,  10,  15,
        12,  14,  16,  23,  26,  18,  41,  32,  37,  24,  40,  21,  33,
        19,  20,  22,  27,  62,  34,  30,  50,  58,  25,  38,  35,  29,
        45,  17,  54,  63,  28,  49,  39,  43,  52,  44,  42,  57,  48,
        47,  31,  36,  61,  46,  82,  77, 201,  60,  87, 116,  92,  76,
        53,  83,  55,  84,  73,  51,  93,  91, 106,  79,  69,  85, 209,
       115,  78,  59,  72,  97, 235,  66,  67,  56,  64,  65, 656, 137,
       108,  94,  71,  68,  70, 125,  81,  75, 227, 133])