In [89]:
import plotly.graph_objects as go
import plotly.express as px
import datetime as dt

import numpy as np
import pandas as pd
import os
import json

In [2]:
data_directory = "../data/ontario/"

individual_prefix = "table_person_ontario_"
total_prefix = "total_ontario_"

In [27]:
files = os.listdir(data_directory)

people_files = [f for f in files if individual_prefix in f]
total_files = [f for f in files if total_prefix in f]

In [114]:
#take only latest file for each date

#this might not be best approach, since some data might only be in earlier version of the date
dates_people = {}
for file in people_files:
    time = file.split("_")[-1].split(".")[0]
    date = time[:7]
    #hour_min = time[11:16]
    if date in dates_people : 
        if file > dates_people[date] :
            dates_people[date] = file
    else :
        dates_people[date] = file

In [121]:
dates_total = {}
for file in total_files:
    time = file.split("_")[-1].split(".")[0]
    date = time[:7]
    #hour_min = time[11:16]
    if date in dates_total : 
        if file > dates_total[date] :
            dates_total[date] = file
    else :
        dates_total[date] = file

In [None]:
def get_date(file):
    time = file.split("_")[-1].split(".")[0]
    date = time[:7]
    return date

In [124]:
peoples = {}

for file in people_files:
    
   
    date = get_date(file)
    path = data_directory + file
    with open(path, 'r') as outfile:
        for l in outfile : 
            x = json.loads(l)
            
            if x['case number'] in peoples :
                current = peoples[x['case number']]
                person = x['patient']
                if len(person.split(" ")) >1 :
                    
                    age = int(person.split(" ")[0][:-1])
                    gender = person.split(" ")[1]
                else:
                    age = person.split(" ")[0]
                    gender = person.split(" ")[0]
                place = x['public health unit']
                transmission = x['transmission']
                status = x['status']
                new = { 
                        "date" : date,
                        "age" : age, 
                        'gender' : gender, 
                        'public health unit' : place,
                        'transmission' : transmission,
                        'status' : status
                                            }
                #check if info has been updated
                for key, item in new.items():
                    if key == 'case number':
                        continue
                    if current[key] == 'pending' and item != 'pending':
                        current[key] = item
                    
                peoples[x['case number']] = current
            else :
                person = x['patient']
                
                if len(person.split(" ")) >1 :
                    
                    age = int(person.split(" ")[0][:-1])
                    gender = person.split(" ")[1]
                else:
                    age = person.split(" ")[0]
                    gender = person.split(" ")[0]
                place = x['public health unit']
                transmission = x['transmission']
                status = x['status']
                peoples[x['case number']] = {
                                            "date" : date,
                                            "age" : age, 
                                            'gender' : gender, 
                                            'public health unit' : place,
                                            'transmission' : transmission,
                                            'status' : status

                                            }

In [143]:
peoples_list = []
for key, people in peoples.items():
    people["case number"] = key
    peoples_list.append(people)

df = pd.DataFrame.from_dict(peoples_list)
male = [ {"age" :p["age"], "zone" : p["public health unit"]} for p in peoples_list if p["gender"] == "Male" and p["age"] != 'pending' ]
male = pd.DataFrame.from_dict(male)
          
female = [ {"age" :p["age"], "zone" : p["public health unit"]} for p in peoples_list if p["gender"] == "Female" and p["age"] != 'pending']
female = pd.DataFrame.from_dict(female)


In [None]:
total = []
for file in total_files():
    date = get_date(file)
    path = data_directory + file
    with open(path, 'r') as outfile:
        temp = json.load(outfile)
    temp["Date"] = date
    total.append(temp) 
total_df = pd.DataFrame.from_dict(total)

In [152]:
total = sorted(total, key=lambda k: k['Date']) 
negative = [t['Negative1'] for t in total]
under = [t['Currently under investigation2'] for t in total]
positive = [t['Confirmed positive3'] for t in total]
resolved = [t['Resolved4'] for t in total]
dead = [t['Deceased'] for t in total]
dates = [t['Date'] for t in total]

In [154]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=dates, y=positive,
                    mode='lines',
                    name='Total Number of Cases'))
fig.show()

In [155]:

fig = go.Figure(data=[
    go.Bar(name='Negative', x=dates, y=negative),
    go.Bar(name='Under Investigation', x=dates, y=under),
    go.Bar(name='Confirmed Positive', x=dates, y=positive),
    go.Bar(name='Resolved', x=dates, y=resolved),
    go.Bar(name='Dead', x=dates, y=dead),
])
fig.update_layout(
    title_text='Infection over time', # title of plot
    xaxis_title_text='Date', # xaxis label
    yaxis_title_text='Infected', # yaxis label
    bargap=0.7, # gap between bars of adjacent location coordinates
    bargroupgap=0.1 # gap between bars of the same location coordinates
)
# Change the bar mode
fig.update_layout(barmode='stack')
fig.show()

In [145]:
fig = px.histogram(df, x="date", color = "public health unit", color_discrete_sequence= px.colors.qualitative.Light24)
fig.update_layout(
    title_text='Infection over time', # title of plot
    xaxis_title_text='Date', # xaxis label
    yaxis_title_text='Infected', # yaxis label
    bargap=0.7, # gap between bars of adjacent location coordinates
    bargroupgap=0.1 # gap between bars of the same location coordinates
)

fig.show()

In [77]:
fig = px.histogram(male, x="age", color = "zone", color_discrete_sequence= px.colors.qualitative.Light24)
fig.update_layout(
    title_text='Male Infected by Age', # title of plot
    xaxis_title_text='Age', # xaxis label
    yaxis_title_text='Count', # yaxis label
    bargap=0.7, # gap between bars of adjacent location coordinates
    bargroupgap=0.1 # gap between bars of the same location coordinates
)

fig.show()

In [80]:
fig = px.histogram(female, x="age", color = "zone", color_discrete_sequence= px.colors.qualitative.Light24)
fig.update_layout(
    title_text='Female Infected by Age', # title of plot
    xaxis_title_text='Age', # xaxis label
    yaxis_title_text='Count', # yaxis label
    bargap=0.7, # gap between bars of adjacent location coordinates
    #bargroupgap=0.1 # gap between bars of the same location coordinates
)

fig.show()

In [158]:
#Add logic to get cumualtive transmission over time


In [157]:
#NEED TO GET THIS TO CUMULATIVE TRANSMISSION OVER TIME

fig = px.histogram(df, x= "date", color = "transmission")
fig.update_layout(
    title_text='Transmission over time', # title of plot
    xaxis_title_text='Date', # xaxis label
    yaxis_title_text='Infected', # yaxis label
    bargap=0.7, # gap between bars of adjacent location coordinates
    bargroupgap=0.1 # gap between bars of the same location coordinates
)
fig.show()