In [71]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches
from IPython.display import display, HTML
import numpy as np
import json
from scipy import stats
import math
import plotly.graph_objects as go

from datetime import datetime
#plt.rcParams["figure.figsize"] = (17, 5) # (w, h)

In [72]:
root = '../data_storage'
file_names = []
dates = []

for subdir, dirs, files in os.walk(root):
    for file in files:
        file_names.append(os.path.join(subdir, file).replace('\\', '/'))
        
        datetime_object = datetime.strptime(file[5:-9], '%d %b %Y %H:%M:%S')
        dates.append(datetime_object)
        
youngest = max(dates)

for file in files:
    datetime_object = datetime.strptime(file[5:-9], '%d %b %Y %H:%M:%S')
    if(datetime_object == youngest):
        youngest = '../data_storage/' + file

In [73]:
main_frame = pd.DataFrame()

for f in file_names:
    df = pd.read_json(f)
    main_frame = main_frame.append(df)
    
youngest_frame = pd.read_json(youngest)

In [74]:
main_frame.head(10)

Unnamed: 0,hospital,contact,fed,icu_low_care,icu_high_care,updated,ecmo
0,Westmecklenburg Klinikum Helene von Bülow Kran...,1A Website,MV,yellow,red,"27.03.2020, 13:55",
1,"Helios Klinikum Meiningen, Klinik für Anästhes...",IMZ Website,TH,green,green,"27.03.2020, 12:41",
2,"Kliniken im Naturpark Altmühltal, Klinik Kösch...",Interdiszpl. Intensivstation Website,BY,green,yellow,"27.03.2020, 11:13",
3,Westmecklenburg Klinikum Helene von Bülow Kran...,ITS Website,MV,green,green,"27.03.2020, 10:53",
4,"Heidekreis-Klinikum gGmbH - Standort Walsrode,...",A3 Website,NI,green,green,"27.03.2020, 10:25",
5,"Evk Castrop-Rauxel, Anästhesie und Innere, Gru...",Intensivstation Website,NRW,red,green,"27.03.2020, 09:56",
6,Kliniken am Goldenen Steig - Krankenhaus Freyu...,interdisziplinäre Intensivstation Website,BY,green,green,"27.03.2020, 09:59",
7,Alexianer Hedwigkliniken (Standorte St. Hedwig...,Website,BE,green,green,"26.03.2020, 17:12",
8,"VAMED Klinik Hagen Ambrock, Pneumologie, Ambro...",PN3 Website,NRW,red,green,"27.03.2020, 10:56",
9,"Klinikum Altmühlfranken Weißenburg, Anästhesio...",Interdisziplinäre Intensivstation Website,BY,green,red,"26.03.2020, 16:46",


In [75]:
main_frame['updated'] = pd.to_datetime(main_frame['updated'])
#main_frame = main_frame[main_frame['updated'].dt.year==2020]

In [76]:
main_frame.describe()

Unnamed: 0,hospital,contact,fed,icu_low_care,icu_high_care,updated,ecmo
count,20643,20643,20643,20643,20643,20643,5914
unique,729,315,17,3,3,1267,3
top,"St. Marienhospital Vechta gGmbH, Abteilung für...",Intensivstation Website,NRW,green,green,2013-03-20 09:26:00,green
freq,60,3944,4802,15178,16066,202,3841
first,,,,,,2013-03-20 09:26:00,
last,,,,,,2020-03-27 15:13:00,


In [82]:
dfDummiesHigh = pd.get_dummies(main_frame['icu_high_care'], prefix = 'high_care')
dfDummiesLow = pd.get_dummies(main_frame['icu_low_care'], prefix = 'low_care')
dfDummiesEcmo = pd.get_dummies(main_frame['ecmo'], prefix = 'ecmo')

df = pd.concat([main_frame, dfDummiesHigh, dfDummiesLow, dfDummiesEcmo], axis=1)
df = df.groupby(['updated']).sum()
df = df.sort_values('updated')
df.head(-1000)

Unnamed: 0_level_0,high_care_green,high_care_red,high_care_yellow,low_care_green,low_care_red,low_care_yellow,ecmo_green,ecmo_red,ecmo_yellow
updated,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2013-03-20 09:26:00,202,0,0,202,0,0,202,0,0
2020-03-16 12:36:00,26,0,0,26,0,0,26,0,0
2020-03-17 12:07:00,30,0,0,30,0,0,30,0,0
2020-03-17 18:41:00,30,0,0,30,0,0,0,30,0
2020-03-18 06:07:00,0,0,30,0,30,0,0,0,30
...,...,...,...,...,...,...,...,...,...
2020-03-24 12:52:00,24,0,0,24,0,0,0,0,0
2020-03-24 13:02:00,30,0,0,0,0,30,30,0,0
2020-03-24 13:03:00,28,0,0,28,0,0,0,0,0
2020-03-24 13:04:00,1,0,0,1,0,0,1,0,0


In [78]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df.high_care_green, 
                         name="High Care: Green",
                         line_color='green',connectgaps=False))
fig.add_trace(go.Scatter(x=df.index, y=df.high_care_yellow, 
                         name="High Care: Yellow",
                         line_color='yellow',connectgaps=False))
fig.add_trace(go.Scatter(x=df.index, y=df.high_care_red, 
                         name="High Care: Red",
                         line_color='red',connectgaps=False))

fig.update_layout(title_text='Time Series with Rangeslider', xaxis_rangeslider_visible=True)
fig.show()
fig.write_html("plot.html")