---
# Old Cells/Refs, Run @ Own Risk

In [None]:
# todo: old time series - validate/reorganize
df_time = df_time.groupby('charttime')['heart_rate'].mean().dropna()
df_time.index = pd.to_datetime(df_time.index, errors='coerce')
df_time = df_time.sort_index().dropna()
df_time = df_time.asfreq('D').interpolate(method='time')

# Continue with time series preparation
time_series_data = df_time

# Fit ARIMA model
order = (5, 1, 0)  # Example ARIMA parameters, adjust as needed
model = ARIMA(time_series_data, order=order)
model_fit = model.fit()

# Forecast the next 30 days
forecast = model_fit.forecast(steps=30)

# Plot the actual data and forecast
plt.figure(figsize=(12, 6))
plt.plot(time_series_data, label='Actual Data', marker='o')
plt.plot(forecast.index, forecast, label='Forecast', linestyle='--', color='orange')
plt.title('ARIMA Model - Heart Rate')
plt.xlabel('Charttime')
plt.ylabel('Heart Rate')
plt.legend()
plt.grid()
plt.show()

In [None]:
# todo: old time series - validate/reorganize

df_time = df_time.groupby('charttime')['heart_rate'].mean().dropna()
df_time.index = pd.to_datetime(df_time.index, errors='coerce')
df_time = df_time.sort_index().dropna()
df_time = df_time.asfreq('D').interpolate(method='time')

# Continue with time series preparation
time_series_data = df_time

# Calculate a moving average and min/max for better visualization
window_size = 7  # Weekly moving average
moving_avg = time_series_data.rolling(window=window_size).mean()
moving_min = time_series_data.rolling(window=window_size).min()
moving_max = time_series_data.rolling(window=window_size).max()

# Fit ARIMA model
order = (5, 1, 0)  # Example ARIMA parameters, adjust as needed
model = ARIMA(time_series_data, order=order)
model_fit = model.fit()

# Forecast the next 30 days
forecast = model_fit.forecast(steps=30)

# Plot the actual data, moving average, min/max, and forecast
plt.figure(figsize=(14, 8))

# Actual data
plt.plot(time_series_data, label='Actual Data', color='blue', alpha=0.4, marker='o', markersize=2)

# Moving average
plt.plot(moving_avg, label=f'{window_size}-Day Moving Average', color='green', linewidth=2)

# Min/Max bands
plt.fill_between(time_series_data.index, moving_min, moving_max, color='gray', alpha=0.3, label='Min/Max Range')

# Forecast
plt.plot(forecast.index, forecast, label='Forecast (30 days)', linestyle='--', color='orange', linewidth=2)

# Improve plot aesthetics
plt.title('ARIMA Model - Heart Rate with Moving Average and Min/Max Range', fontsize=16)
plt.xlabel('Charttime', fontsize=12)
plt.ylabel('Heart Rate', fontsize=12)
plt.legend(fontsize=10)
plt.grid(alpha=0.3)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# todo: old time series - validate/reorganize

df_time['heart_rate'] = pd.to_numeric(df_time['heart_rate'], errors='coerce')
df_time = df_time.dropna(subset=['heart_rate', 'charttime'])
df_time = df_time.groupby('charttime')['heart_rate'].mean().dropna()
df_time.index = pd.to_datetime(df_time.index, errors='coerce')
df_time = df_time.sort_index().dropna()
df_time = df_time.asfreq('D').interpolate(method='time')

# Continue with time series preparation
time_series_data = df_time

# Calculate a moving average and min/max for better visualization
window_size = 7  # Weekly moving average
moving_avg = time_series_data.rolling(window=window_size).mean()
moving_min = time_series_data.rolling(window=window_size).min()
moving_max = time_series_data.rolling(window=window_size).max()

# Fit ARIMA model
order = (5, 1, 0)  # Example ARIMA parameters, adjust as needed
model = ARIMA(time_series_data, order=order)
model_fit = model.fit()

# Forecast the next 90 days with confidence intervals
forecast_object = model_fit.get_forecast(steps=90)
forecast = forecast_object.predicted_mean
confidence_intervals = forecast_object.conf_int()

# Extract lower and upper bounds of confidence intervals
lower_bounds = confidence_intervals.iloc[:, 0]
upper_bounds = confidence_intervals.iloc[:, 1]

# Plot the historical data and forecast with confidence intervals
plt.figure(figsize=(12, 6))
plt.plot(time_series_data, label='Historical', color='blue', linewidth=2)
plt.plot(forecast.index, forecast, label='Forecast (90 days)', color='red', linestyle='--', linewidth=2)
plt.fill_between(forecast.index, lower_bounds, upper_bounds, color='red', alpha=0.2, label='Confidence Interval')

# Add titles and labels
plt.title('Heart Rate Forecast (90 days) for Patient 16454913', fontsize=14)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Heart Rate', fontsize=12)
plt.legend(fontsize=10)
plt.grid(alpha=0.3)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
plot_histogram(vdf, 'heart_rate', 50)
plot_scatter(vdf, 'heart_rate', 'resp_rate', alpha=0.2)
plot_scatter(vdf, 'heart_rate', 'charttime', alpha=0.2)

In [None]:
vdf_numeric = vdf.drop(columns=vdf.select_dtypes(exclude=['number']).columns)
vdf_numeric.drop('subject_id', axis=1, inplace=True)
vdf_numeric.drop('stay_id', axis=1, inplace=True)
vdf_numeric.head()

In [None]:
vdf_corr = vdf_numeric.corr()
plot_correlation_matrix(vdf_corr)

In [None]:
vdf_corr_dropna = vdf_numeric.dropna().corr()
plot_correlation_matrix(vdf_corr_dropna)

In [None]:
# re-query vdf, joining age value
vital_age_query = f"""
select t1.*, t2.age
from mimiciv_derived.vitalsign t1
join mimiciv_derived.age t2
on t1.subject_id = t2.subject_id
where heart_rate is not NULL;
"""

chunks = pd.read_sql(vital_age_query, engine, chunksize=100000)


In [None]:
# compile vitals + age dataframe from chunks 
va_df = pd.concat(chunks, ignore_index=True)
va_df.head()

In [None]:
va_corr_df = va_df.drop(columns=va_df.select_dtypes(exclude=['number']).columns)
va_corr = va_corr_df.corr()
plot_correlation_matrix(va_corr)

## Vitals + Chemistry

In [None]:
# query vitals + chemistry
vital_chem_query = f"""
select t1.heart_rate, t1.sbp, t1.dbp, t1.mbp, t1.sbp_ni, t1.dbp_ni, t1.mbp_ni, t1.resp_rate, t1.temperature, t1.spo2,
       t2.aniongap, t2.bicarbonate, t2.bun, t2.calcium, t2.chloride, t2.creatinine, t1.glucose, t2.sodium, t2.potassium,
        t3.weight
from mimiciv_derived.vitalsign t1
         join mimiciv_derived.chemistry t2
              on t1.subject_id = t2.subject_id
        join mimiciv_derived.weight_durations t3
            on t1.stay_id = t3.stay_id
where t1.heart_rate is not null
  and t1.sbp is not null
  and t1.dbp is not null
  and t1.mbp is not null
  and t1.sbp_ni is not null
  and t1.dbp_ni is not null
  and t1.mbp_ni is not null
  and t1.resp_rate is not null
  and t1.temperature is not null
  and t1.spo2 is not null
  and t2.aniongap is not null
  and t2.bicarbonate is not null
  and t2.bun is not null
  and t2.calcium is not null
  and t2.chloride is not null
  and t2.creatinine is not null
  and t1.glucose is not null
  and t2.sodium is not null
  and t2.potassium is not null
  and t3.weight is not null;
"""

chunks = pd.read_sql(vital_chem_query, engine, chunksize=100000)

In [None]:
#vc_df.head(5)

In [None]:
#vc_df_corr = vc_df.drop(columns=vc_df.select_dtypes(exclude=['number']).columns).corr()

In [None]:
#plot_correlation_matrix(vc_df_corr)