In [35]:
import pandas as pd
import datetime
import numpy as np

import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output
import plotly.express as px
import jupyter_dash
from dash.dependencies import Input, Output
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from IPython.display import display

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

In [36]:
# pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', None)

In [53]:
df = pd.read_csv('data/vitals_original.csv', encoding='utf-8')
df.rename(columns={'Timestamp':'Date'}, inplace=True)
df['Date'] = pd.to_datetime(df['Date']).dt.date
df = df.set_index('Date')
df

Unnamed: 0_level_0,Temperature,Respiration Rate,Heart Rate,Blood Pressure,Pain,Pain Location
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-10-30,96.4,12,59,137/80,0,
2022-10-31,96.1,8,54,127/75,0,
2022-11-01,96.2,12,79,137/72,0,
2022-11-13,96.9,10,58,132/78,0,
2022-11-14,96.9,12,76,126/73,0,
...,...,...,...,...,...,...
2023-03-31,97.2,11,60,148/75,0,
2023-04-01,97.7,12,65,142/70,0,
2023-04-02,97.6,11,97,138/82,0,
2023-04-03,97.2,9,70,141/85,0,


In [54]:
# splitting at ', ' into Data frame
bp = df["Blood Pressure"].str.partition("/", True)

df["Blood Pressure Systolic"] = bp[0].astype(int)
df["Blood Pressure Diastolic"] = bp[2].astype(int)

# MAP = 1/3 * SBP + 2/3 * DBP
df["Mean Arterial Pressure"] = (df["Blood Pressure Systolic"] * 1/3)+(df["Blood Pressure Diastolic"] * 2/3)
df

Unnamed: 0_level_0,Temperature,Respiration Rate,Heart Rate,Blood Pressure,Pain,Pain Location,Blood Pressure Systolic,Blood Pressure Diastolic,Mean Arterial Pressure
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-10-30,96.4,12,59,137/80,0,,137,80,99.000000
2022-10-31,96.1,8,54,127/75,0,,127,75,92.333333
2022-11-01,96.2,12,79,137/72,0,,137,72,93.666667
2022-11-13,96.9,10,58,132/78,0,,132,78,96.000000
2022-11-14,96.9,12,76,126/73,0,,126,73,90.666667
...,...,...,...,...,...,...,...,...,...
2023-03-31,97.2,11,60,148/75,0,,148,75,99.333333
2023-04-01,97.7,12,65,142/70,0,,142,70,94.000000
2023-04-02,97.6,11,97,138/82,0,,138,82,100.666667
2023-04-03,97.2,9,70,141/85,0,,141,85,103.666667


In [55]:
fig = px.line(df, x=df.index, y=['Blood Pressure Systolic', 'Blood Pressure Diastolic'])
fig.show(renderer="browser")

In [56]:
# fig = px.line(df, x=df.index, y='Temperature')
# fig.show(renderer="browser")

In [57]:
# fig = px.line(df, x=df.index, y='Heart Rate')
# fig.show(renderer="browser")

In [58]:
# fig = px.line(df, x=df.index, y='Respiration Rate')
# fig.show(renderer="browser")

In [59]:
df = df.reset_index()
df

Unnamed: 0,Date,Temperature,Respiration Rate,Heart Rate,Blood Pressure,Pain,Pain Location,Blood Pressure Systolic,Blood Pressure Diastolic,Mean Arterial Pressure
0,2022-10-30,96.4,12,59,137/80,0,,137,80,99.000000
1,2022-10-31,96.1,8,54,127/75,0,,127,75,92.333333
2,2022-11-01,96.2,12,79,137/72,0,,137,72,93.666667
3,2022-11-13,96.9,10,58,132/78,0,,132,78,96.000000
4,2022-11-14,96.9,12,76,126/73,0,,126,73,90.666667
...,...,...,...,...,...,...,...,...,...,...
125,2023-03-31,97.2,11,60,148/75,0,,148,75,99.333333
126,2023-04-01,97.7,12,65,142/70,0,,142,70,94.000000
127,2023-04-02,97.6,11,97,138/82,0,,138,82,100.666667
128,2023-04-03,97.2,9,70,141/85,0,,141,85,103.666667


In [60]:
df = df.drop(columns=['Blood Pressure', 'Pain', 'Pain Location'])

In [61]:
df = df.rename(columns={'Heart Rate': 'Vitals Heart Rate'})
df

Unnamed: 0,Date,Temperature,Respiration Rate,Vitals Heart Rate,Blood Pressure Systolic,Blood Pressure Diastolic,Mean Arterial Pressure
0,2022-10-30,96.4,12,59,137,80,99.000000
1,2022-10-31,96.1,8,54,127,75,92.333333
2,2022-11-01,96.2,12,79,137,72,93.666667
3,2022-11-13,96.9,10,58,132,78,96.000000
4,2022-11-14,96.9,12,76,126,73,90.666667
...,...,...,...,...,...,...,...
125,2023-03-31,97.2,11,60,148,75,99.333333
126,2023-04-01,97.7,12,65,142,70,94.000000
127,2023-04-02,97.6,11,97,138,82,100.666667
128,2023-04-03,97.2,9,70,141,85,103.666667


In [62]:
# any dates missing?
start = '2022-11-13'
end = '2023-04-01'
pd.date_range(start=start, end=end).difference(df['Date'])

DatetimeIndex(['2023-01-17', '2023-02-15', '2023-02-21', '2023-02-22',
               '2023-02-27', '2023-03-02', '2023-03-06', '2023-03-07',
               '2023-03-09', '2023-03-10', '2023-03-13', '2023-03-15',
               '2023-03-20', '2023-03-22', '2023-03-23', '2023-03-29'],
              dtype='datetime64[ns]', freq=None)

In [63]:
# # https://stackoverflow.com/questions/51656065/pandas-resampling-typeerror-only-valid-with-datetimeindex-timedeltaindex-or-p
# # https://stackoverflow.com/questions/54592536/find-gaps-in-pandas-time-series-dataframe-sampled-at-1-minute-intervals-and-fill
# # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html
# # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html#pandas.DataFrame.asfreq
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')
df = df.resample('D', convention='start').asfreq() # 'T' = seconds, 'D' = days
df = df.reset_index()
df

Unnamed: 0,Date,Temperature,Respiration Rate,Vitals Heart Rate,Blood Pressure Systolic,Blood Pressure Diastolic,Mean Arterial Pressure
0,2022-10-30,96.4,12.0,59.0,137.0,80.0,99.000000
1,2022-10-31,96.1,8.0,54.0,127.0,75.0,92.333333
2,2022-11-01,96.2,12.0,79.0,137.0,72.0,93.666667
3,2022-11-02,,,,,,
4,2022-11-03,,,,,,
...,...,...,...,...,...,...,...
152,2023-03-31,97.2,11.0,60.0,148.0,75.0,99.333333
153,2023-04-01,97.7,12.0,65.0,142.0,70.0,94.000000
154,2023-04-02,97.6,11.0,97.0,138.0,82.0,100.666667
155,2023-04-03,97.2,9.0,70.0,141.0,85.0,103.666667


In [64]:
# df.fillna((df.mean()), inplace=True)
df.fillna((df[['Temperature', 'Respiration Rate', 'Vitals Heart Rate', 
               'Blood Pressure Systolic', 'Blood Pressure Diastolic', 
               'Mean Arterial Pressure']].mean()), inplace=True)
df

Unnamed: 0,Date,Temperature,Respiration Rate,Vitals Heart Rate,Blood Pressure Systolic,Blood Pressure Diastolic,Mean Arterial Pressure
0,2022-10-30,96.400000,12.000000,59.000000,137.000000,80.000000,99.000000
1,2022-10-31,96.100000,8.000000,54.000000,127.000000,75.000000,92.333333
2,2022-11-01,96.200000,12.000000,79.000000,137.000000,72.000000,93.666667
3,2022-11-02,96.660769,10.784615,61.253846,131.615385,72.707692,92.343590
4,2022-11-03,96.660769,10.784615,61.253846,131.615385,72.707692,92.343590
...,...,...,...,...,...,...,...
152,2023-03-31,97.200000,11.000000,60.000000,148.000000,75.000000,99.333333
153,2023-04-01,97.700000,12.000000,65.000000,142.000000,70.000000,94.000000
154,2023-04-02,97.600000,11.000000,97.000000,138.000000,82.000000,100.666667
155,2023-04-03,97.200000,9.000000,70.000000,141.000000,85.000000,103.666667


In [65]:
# any dates missing?
start = '2022-11-13'
end = '2023-04-01'
pd.date_range(start=start, end=end).difference(df['Date'])

DatetimeIndex([], dtype='datetime64[ns]', freq=None)

In [67]:
df.to_csv('data/vitals.csv', index=None, encoding='utf-8')