In [1]:
from azureml.core import Workspace, Dataset
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import timedelta
import datetime
import sklearn.metrics as skm
import warnings
warnings.filterwarnings('ignore')

## Siirtymätodennäköisyyksien analysointia kolmivuorotasolla

In [2]:
train_test_day = pd.Timestamp('2018-10-01')
start = pd.Timestamp('2017-01-01T00')
end = pd.Timestamp('2019-12-31T00')

const1 = 26288
const2 = 26280

const3 = 11160
const4 = 11152

In [3]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def mean_absolute_error(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def mean_squared_error(y_true, y_pred):
    return np.mean(np.power((y_true - y_pred), 2))


## Lasten päivystys

In [None]:
subscription_id = '4371739e-d07f-42d5-a3a6-efa120c1e246'
resource_group = 'husfd-tu-dip-potilasvirrat'
workspace_name = 'husfd-tu-dip-potilasvirrat-ml'

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='uranus27_1')
features = ['kaynti_numero', 'potilasnumero', 'henkilotunnus', 'alkuhetki', 'loppuhetki',
       'vo_toimipiste_nimi', 'kayntityyppi_selite', 'varaustyyppi_selite',
       'mista_tuli_selite', 'res_koodi', 'res_selite', 'jh_selite', 
       'jatkoh_laitos_nimi', 'jatkoh_toimipiste_nimi']
df = dataset.to_pandas_dataframe()[features]
df.replace("", float("NaN"), inplace=True)
df.dropna(subset=['alkuhetki', 'loppuhetki'], inplace=True)
df.drop_duplicates(subset='kaynti_numero', inplace=True)
df = df[df['varaustyyppi_selite'] != 'PÄIV PKL soitto']
df = df[(df['kayntityyppi_selite'] != 'Hoitokäynti') & (df['kayntityyppi_selite'] != 'Ohjattu muualle') & 
       (df['kayntityyppi_selite'] !='HYKSin Oy:n potilas') & (df['kayntityyppi_selite'] != 'Sarjahoitokäynti') & 
       (df['kayntityyppi_selite'] != 'Ensikäynti')]
df = df[(df.alkuhetki >= start) & (df.alkuhetki < end)]
df_train = df
df_test = df[df.alkuhetki >= train_test_day]
pd.set_option('display.max_columns', None)
df_train = df_train.sort_values(by='alkuhetki')

In [5]:
df_train['alkuhetki'] = pd.to_datetime(df_train['alkuhetki'], format="%Y-%m-%d %H:%M:%S")
df_train['loppuhetki'] = pd.to_datetime(df_train['loppuhetki'], format="%Y-%m-%d %H:%M:%S")
df_train = df_train[df_train['alkuhetki'] <= df_train['loppuhetki']]
df_train['aikaväli'] = pd.arrays.IntervalArray.from_arrays(left = df_train['alkuhetki'], right = df_train['loppuhetki'], closed='neither')
df_train['palveluaika'] = ((df_train['loppuhetki'] - df_train['alkuhetki']).astype('timedelta64[s]') / 3600)

In [12]:
df_test['alkuhetki'] = pd.to_datetime(df_test['alkuhetki'], format="%Y-%m-%d %H:%M:%S")
df_test['loppuhetki'] = pd.to_datetime(df_test['loppuhetki'], format="%Y-%m-%d %H:%M:%S")
df_test = df_test[df_test['alkuhetki'] <= df_test['loppuhetki']]
df_test['aikaväli'] = pd.arrays.IntervalArray.from_arrays(left = df_test['alkuhetki'], right = df_test['loppuhetki'], closed='neither')
df_test['palveluaika'] = ((df_test['loppuhetki'] - df_test['alkuhetki']).astype('timedelta64[s]') / 3600)

In [13]:
aika = train_test_day
span_day = pd.DataFrame(data={'time':[aika + timedelta(hours=x) for x in range(0, const3, 8)]})
timestamp_day = [aika + timedelta(hours=x) for x in range(0, const4, 8)]
df_test_day = pd.DataFrame(data={'time':timestamp_day})
df_test_day['weekday'] = df_test_day['time'].dt.weekday
df_test_day['month'] = df_test_day['time'].dt.month
df_test_day['timespan'] = pd.arrays.IntervalArray.from_arrays(left = span_day['time'][0:-1], right = span_day['time'][1:], closed='left')
df_test_day['patient_count'] = pd.DataFrame([pd.arrays.IntervalArray(df_test_day['timespan']).overlaps(b) for b in df_test['aikaväli']]).sum()

for t in range(len(df_test_day['timespan'])):
    sum = 0
    for a, b in zip(df_test['aikaväli'], df_test['loppuhetki']):
        if ((df_test_day.loc[t, 'timespan'].overlaps(a)) == True) & ((b in df_test_day.loc[t, 'timespan']) == False):
            sum += 1
    df_test_day.loc[t, 'päiv'] = sum
df_test_day['JOL1'] = pd.DataFrame([pd.arrays.IntervalArray(df_test_day['timespan']).contains(b) for b in df_test[df_test.jatkoh_laitos_nimi == 'JOL1'].loppuhetki]).sum()
df_test_day['koti'] = pd.DataFrame([pd.arrays.IntervalArray(df_test_day['timespan']).contains(b) for b in df_test[df_test.jh_selite == 'Koti'].loppuhetki]).sum()
df_test_day['muu'] = pd.DataFrame([pd.arrays.IntervalArray(df_test_day['timespan']).contains(b) for b in df_test[(df_test.jh_selite != 'Koti') & (df_test.jatkoh_laitos_nimi != 'JOL1')].loppuhetki]).sum()
df_test_day.set_index('time', inplace=True)

In [14]:
df_test_day['päiv_prob_00'] = df_test_day.iloc[::3, :].päiv / df_test_day.iloc[::3, :].patient_count
df_test_day['päiv_prob_08'] = df_test_day.iloc[1::3, :].päiv / df_test_day.iloc[1::3, :].patient_count
df_test_day['päiv_prob_16'] = df_test_day.iloc[2::3, :].päiv / df_test_day.iloc[2::3, :].patient_count
df_test_day['JOL1_prob_00'] = df_test_day.iloc[::3, :].JOL1 / df_test_day.iloc[::3, :].patient_count
df_test_day['JOL1_prob_08'] = df_test_day.iloc[1::3, :].JOL1 / df_test_day.iloc[1::3, :].patient_count
df_test_day['JOL1_prob_16'] = df_test_day.iloc[2::3, :].JOL1 / df_test_day.iloc[2::3, :].patient_count
df_test_day['koti_prob_00'] = df_test_day.iloc[::3, :].koti / df_test_day.iloc[::3, :].patient_count
df_test_day['koti_prob_08'] = df_test_day.iloc[1::3, :].koti / df_test_day.iloc[1::3, :].patient_count
df_test_day['koti_prob_16'] = df_test_day.iloc[2::3, :].koti / df_test_day.iloc[2::3, :].patient_count
df_test_day['muu_prob_00'] = df_test_day.iloc[::3, :].muu / df_test_day.iloc[::3, :].patient_count
df_test_day['muu_prob_08'] = df_test_day.iloc[1::3, :].muu / df_test_day.iloc[1::3, :].patient_count
df_test_day['muu_prob_16'] = df_test_day.iloc[2::3, :].muu / df_test_day.iloc[2::3, :].patient_count

df_test_day_grouped = df_test_day.groupby(by='weekday')
train_mean = df_test_day_grouped.mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00', 
    'päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08', 'päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]

In [18]:
def metrics(test_df, columns):
    result_df = pd.DataFrame()
    for i, j in zip(3*list(range(0, 12, 4)), range(12, 48, 4)):
        mape = mean_absolute_percentage_error(test_df.iloc[:, i:i+4], test_df.iloc[:, j:j+4])
        mae = mean_absolute_error(test_df.iloc[:, i:i+4], test_df.iloc[:, j:j+4])
        mse = mean_squared_error(test_df.iloc[:, i:i+4], test_df.iloc[:, j:j+4])
        rmse = np.sqrt(mse)
        result_df = pd.concat([result_df, mape, mae, rmse], axis=1)

    mean_df = result_df.mean(axis=0)
    mean_df.index = ['MAPE', 'MAE', 'RMSE'] * 9
    mean_mape_df = result_df.loc[columns]
    mean_df.iloc[list(range(0, 27, 3))] = mean_mape_df.iloc[:, list(range(0, 27, 3))].mean(axis=0)
    mean_by_day_df = pd.DataFrame()
    for i in range(0, 27, 9):
        mean_by_day_df = pd.concat([mean_by_day_df, pd.DataFrame(np.mean([mean_df[0+i:3+i], mean_df[3+i:6+i], mean_df[6+i:9+i]], axis=0))])
    mean_by_day_df.index = ['MAPE', 'MAE', 'RMSE'] * 3
    return mean_df, mean_by_day_df

def smoothen_transition_probs(df_mean):
    for row in range(len(df_mean)):
        probs_sum = np.sum([df_mean.at[row, n] for n in df_mean.columns])
        for names in df_mean.columns:
            df_mean.loc[df_mean.index == row, names] = df_mean.loc[df_mean.index == row, names] / probs_sum + 0.000000001
    return df_mean

### Ensimmäinen ennusteajankohta 22.1.-11.2.2019: toteutuneiden ja 3kk, 2kk ja 1kk historiadatan avulla laskettujen siirtymätodennäköisyyksien erot

In [19]:
test_true_midnight = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-01-22')) & (df_test_day.index < pd.Timestamp('2019-02-12'))].groupby(by='weekday').mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
test_true_day = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-01-22')) & (df_test_day.index < pd.Timestamp('2019-02-12'))].groupby(by='weekday').mean()[['päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
test_true_night = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-01-22')) & (df_test_day.index < pd.Timestamp('2019-02-12'))].groupby(by='weekday').mean()[['päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']])

test_first_midnight = smoothen_transition_probs(df_test_day[df_test_day.index < pd.Timestamp('2019-01-01')].groupby(by='weekday').mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
test_first_day = smoothen_transition_probs(df_test_day[df_test_day.index < pd.Timestamp('2019-01-01')].groupby(by='weekday').mean()[['päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
test_first_night = smoothen_transition_probs(df_test_day[df_test_day.index < pd.Timestamp('2019-01-01')].groupby(by='weekday').mean()[['päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']])   

test_second_midnight = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2018-11-01')) & (df_test_day.index < pd.Timestamp('2019-01-01'))].groupby(by='weekday').mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
test_second_day = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2018-11-01')) & (df_test_day.index < pd.Timestamp('2019-01-01'))].groupby(by='weekday').mean()[['päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
test_second_night = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2018-11-01')) & (df_test_day.index < pd.Timestamp('2019-01-01'))].groupby(by='weekday').mean()[['päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]) 

test_third_midnight = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2018-12-01')) & (df_test_day.index < pd.Timestamp('2019-01-01'))].groupby(by='weekday').mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
test_third_day = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2018-12-01')) & (df_test_day.index < pd.Timestamp('2019-01-01'))].groupby(by='weekday').mean()[['päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
test_third_night = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2018-12-01')) & (df_test_day.index < pd.Timestamp('2019-01-01'))].groupby(by='weekday').mean()[['päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]) 

test_df = pd.concat([test_true_midnight, test_true_day, test_true_night, test_first_midnight, test_first_day, test_first_night, test_second_midnight, test_second_day, test_second_night, test_third_midnight, test_third_day, test_third_night], axis=1)
results_mean, results_mean_day = metrics(test_df, ['päiv_prob_00', 'JOL1_prob_00', 'koti_prob_00', 'päiv_prob_08', 'JOL1_prob_08', 'koti_prob_08', 'päiv_prob_16', 'JOL1_prob_16', 'koti_prob_16'])

### 

In [20]:
results_mean_day

Unnamed: 0,0
MAPE,24.865232
MAE,0.033737
RMSE,0.040741
MAPE,24.324924
MAE,0.032109
RMSE,0.038303
MAPE,28.99292
MAE,0.034908
RMSE,0.041698


### Toinen ennusteajankohta 22.3.-11.4.2019: toteutuneiden ja 3kk, 2kk ja 1kk historiadatan avulla laskettujen siirtymätodennäköisyyksien erot

In [21]:
test_true_midnight = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-03-22')) & (df_test_day.index < pd.Timestamp('2019-04-12'))].groupby(by='weekday').mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
test_true_day = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-03-22')) & (df_test_day.index < pd.Timestamp('2019-04-12'))].groupby(by='weekday').mean()[['päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
test_true_night = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-03-22')) & (df_test_day.index < pd.Timestamp('2019-04-12'))].groupby(by='weekday').mean()[['päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']])

test_first_midnight = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2018-12-01')) & (df_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
test_first_day = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2018-12-01')) & (df_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
test_first_night = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2018-12-01')) & (df_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']])   

test_second_midnight = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-01-01')) & (df_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
test_second_day = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-01-01')) & (df_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
test_second_night = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-01-01')) & (df_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]) 

test_third_midnight = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-02-01')) & (df_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
test_third_day = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-02-01')) & (df_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
test_third_night = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-02-01')) & (df_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]) 

test_df = pd.concat([test_true_midnight, test_true_day, test_true_night, test_first_midnight, test_first_day, test_first_night, test_second_midnight, test_second_day, test_second_night, test_third_midnight, test_third_day, test_third_night], axis=1)
results_mean, results_mean_day = metrics(test_df, ['päiv_prob_00', 'JOL1_prob_00', 'koti_prob_00', 'päiv_prob_08', 'JOL1_prob_08', 'koti_prob_08', 'päiv_prob_16', 'JOL1_prob_16', 'koti_prob_16'])


In [22]:
results_mean_day

Unnamed: 0,0
MAPE,31.032463
MAE,0.044841
RMSE,0.054388
MAPE,35.745771
MAE,0.047296
RMSE,0.057273
MAPE,40.406838
MAE,0.051877
RMSE,0.06332


### Kolmas ennusteajankohta 22.11.-12.12.2019: toteutuneiden ja 3kk, 2kk ja 1kk historiadatan avulla laskettujen siirtymätodennäköisyyksien erot

In [23]:
test_true_midnight = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-11-22')) & (df_test_day.index < pd.Timestamp('2019-12-13'))].groupby(by='weekday').mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
test_true_day = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-11-22')) & (df_test_day.index < pd.Timestamp('2019-12-13'))].groupby(by='weekday').mean()[['päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
test_true_night = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-11-22')) & (df_test_day.index < pd.Timestamp('2019-12-13'))].groupby(by='weekday').mean()[['päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']])

test_first_midnight = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-08-01')) & (df_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
test_first_day = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-08-01')) & (df_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
test_first_night = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-08-01')) & (df_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']])   

test_second_midnight = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-09-01')) & (df_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
test_second_day = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-09-01')) & (df_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
test_second_night = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-09-01')) & (df_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]) 

test_third_midnight = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-10-01')) & (df_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['päiv_prob_00', 'JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
test_third_day = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-10-01')) & (df_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['päiv_prob_08', 'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
test_third_night = smoothen_transition_probs(df_test_day[(df_test_day.index >= pd.Timestamp('2019-10-01')) & (df_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['päiv_prob_16', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]) 

test_df = pd.concat([test_true_midnight, test_true_day, test_true_night, test_first_midnight, test_first_day, test_first_night, test_second_midnight, test_second_day, test_second_night, test_third_midnight, test_third_day, test_third_night], axis=1)
results_mean, results_mean_day = metrics(test_df, ['päiv_prob_00', 'JOL1_prob_00', 'koti_prob_00', 'päiv_prob_08', 'JOL1_prob_08', 'koti_prob_08', 'päiv_prob_16', 'JOL1_prob_16', 'koti_prob_16'])

In [24]:
results_mean_day

Unnamed: 0,0
MAPE,117227900.0
MAE,0.04344077
RMSE,0.0542957
MAPE,157336800.0
MAE,0.04364286
RMSE,0.05438999
MAPE,237854800.0
MAE,0.04616305
RMSE,0.05842255


# L1

In [28]:
subscription_id = '4371739e-d07f-42d5-a3a6-efa120c1e246'
resource_group = 'husfd-tu-dip-potilasvirrat'
workspace_name = 'husfd-tu-dip-potilasvirrat-ml'

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset_ward = Dataset.get_by_name(workspace, name='uranus27_2')
features_ward = ['henkilotunnus', 'alkuhetki', 'loppuhetki', 'vo_toimipiste_nimi', 'pot_eala_selite', 'paadg_oire_selite', 'mista_lah_tuli_koodi',
       'mista_lah_tuli_nimi', 'mista_tuli_koodi', 'mista_tuli_selite', 'jatkoh_laitos_nimi',
       'jatkoh_toimipiste_nimi', 'jh_koodi', 'jh_selite', 'osastohoito_numero', 'shjakso_numero']
ward = dataset_ward.to_pandas_dataframe()[features_ward]
ward.replace("", float("NaN"), inplace=True)
ward.dropna(subset=['alkuhetki', 'loppuhetki'], inplace=True)
ward.drop_duplicates(subset='osastohoito_numero', inplace=True)
ward = ward[(ward.alkuhetki >= start) & (ward.alkuhetki < end)]
ward_train = ward
ward_test = ward[ward.alkuhetki >= train_test_day]

Failed to extract subscription information, Exception=AttributeError; 'Logger' object has no attribute 'activity_info'
Failed to extract subscription information, Exception=AttributeError; 'Logger' object has no attribute 'activity_info'
Failed to extract subscription information, Exception=AttributeError; 'Logger' object has no attribute 'activity_info'
Failed to extract subscription information, Exception=AttributeError; 'Logger' object has no attribute 'activity_info'
Failed to extract subscription information, Exception=AttributeError; 'Logger' object has no attribute 'activity_info'
Failed to extract subscription information, Exception=AttributeError; 'Logger' object has no attribute 'activity_info'


In [29]:
ward_train['alkuhetki'] = pd.to_datetime(ward_train['alkuhetki'], format="%Y-%m-%d %H:%M:%S")
ward_train['loppuhetki'] = pd.to_datetime(ward_train['loppuhetki'], format="%Y-%m-%d %H:%M:%S")
ward_train = ward_train[ward_train['alkuhetki'] <= ward_train['loppuhetki']]
ward_train['aikaväli'] = pd.arrays.IntervalArray.from_arrays(left = ward_train['alkuhetki'], right = ward_train['loppuhetki'], closed='neither')
ward_train['palveluaika'] = np.round(((ward_train['loppuhetki'] - ward_train['alkuhetki']).astype('timedelta64[s]') / 3600 / 24))
ward_train['alku'] = ward_train.alkuhetki
ward_train['loppu'] = ward_train.loppuhetki
ward_train.reset_index(drop=True, inplace=True)

In [39]:
ward_test['alkuhetki'] = pd.to_datetime(ward_test['alkuhetki'], format="%Y-%m-%d %H:%M:%S")
ward_test['loppuhetki'] = pd.to_datetime(ward_test['loppuhetki'], format="%Y-%m-%d %H:%M:%S")
ward_test = ward_test[ward_test['alkuhetki'] <= ward_test['loppuhetki']]
ward_test['aikaväli'] = pd.arrays.IntervalArray.from_arrays(left = ward_test['alkuhetki'], right = ward_test['loppuhetki'], closed='neither')
ward_test['palveluaika'] = np.round(((ward_test['loppuhetki'] - ward_test['alkuhetki']).astype('timedelta64[s]') / 3600 / 24))
ward_test['alku'] = ward_test.alkuhetki
ward_test['loppu'] = ward_test.loppuhetki
ward_test.reset_index(drop=True, inplace=True)

In [40]:
aika = train_test_day
span_day = pd.DataFrame(data={'time':[aika + timedelta(hours=x) for x in range(0, const3, 8)]})
timestamp_day = [aika + timedelta(hours=x) for x in range(0, const4, 8)]
ward_test_day = pd.DataFrame(data={'time':timestamp_day})
ward_test_day['weekday'] = ward_test_day['time'].dt.weekday
ward_test_day['month'] = ward_test_day['time'].dt.month
ward_test_day['timespan'] = pd.arrays.IntervalArray.from_arrays(left = span_day['time'][0:-1], right = span_day['time'][1:], closed='left')
ward_test_day['patient_count'] = pd.DataFrame([pd.arrays.IntervalArray(ward_test_day['timespan']).overlaps(b) for b in ward_test['aikaväli']]).sum()

for t in range(len(ward_test_day['timespan'])):
    sum = 0
    for a, b in zip(ward_test['aikaväli'], ward_test['loppuhetki']):
        if ((ward_test_day.loc[t, 'timespan'].overlaps(a)) == True) & ((b in ward_test_day.loc[t, 'timespan']) == False):
            sum += 1
    ward_test_day.loc[t, 'JOL1'] = sum
ward_test_day['koti'] = pd.DataFrame([pd.arrays.IntervalArray(ward_test_day['timespan']).contains(b) for b in ward_test[(ward_test.jh_selite == 'Koti')].loppuhetki]).sum()
ward_test_day['muu'] = pd.DataFrame([pd.arrays.IntervalArray(ward_test_day['timespan']).contains(b) for b in ward_test[(ward_test.jh_selite != 'Koti')].loppuhetki]).sum()
ward_test_day.set_index('time', inplace=True)

In [41]:
ward_test_day['JOL1_prob_00'] = ward_test_day.iloc[::3, :].JOL1 / ward_test_day.iloc[::3, :].patient_count
ward_test_day['JOL1_prob_08'] = ward_test_day.iloc[1::3, :].JOL1 / ward_test_day.iloc[1::3, :].patient_count
ward_test_day['JOL1_prob_16'] = ward_test_day.iloc[2::3, :].JOL1 / ward_test_day.iloc[2::3, :].patient_count
ward_test_day['koti_prob_00'] = ward_test_day.iloc[::3, :].koti / ward_test_day.iloc[::3, :].patient_count
ward_test_day['koti_prob_08'] = ward_test_day.iloc[1::3, :].koti / ward_test_day.iloc[1::3, :].patient_count
ward_test_day['koti_prob_16'] = ward_test_day.iloc[2::3, :].koti / ward_test_day.iloc[2::3, :].patient_count
ward_test_day['muu_prob_00'] = ward_test_day.iloc[::3, :].muu / ward_test_day.iloc[::3, :].patient_count
ward_test_day['muu_prob_08'] = ward_test_day.iloc[1::3, :].muu / ward_test_day.iloc[1::3, :].patient_count
ward_test_day['muu_prob_16'] = ward_test_day.iloc[2::3, :].muu / ward_test_day.iloc[2::3, :].patient_count
ward_test_day_grouped = ward_test_day.groupby(by='weekday')
ward_test_mean = ward_test_day_grouped.mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00', 
    'JOL1_prob_08', 'muu_prob_08', 'koti_prob_08', 'JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]

In [42]:
def metrics(test_df, columns):
    result_df = pd.DataFrame()
    for i, j in zip(3*list(range(0, 9, 3)), range(9, 36, 3)):
        mape = mean_absolute_percentage_error(test_df.iloc[:, i:i+3], test_df.iloc[:, j:j+3])
        mae = mean_absolute_error(test_df.iloc[:, i:i+3], test_df.iloc[:, j:j+3])
        mse = mean_squared_error(test_df.iloc[:, i:i+3], test_df.iloc[:, j:j+3])
        rmse = np.sqrt(mse)
        result_df = pd.concat([result_df, mape, mae, rmse], axis=1)
    mean_df = result_df.mean(axis=0)
    mean_df.index = ['MAPE', 'MAE', 'RMSE'] * 9

    mean_mape_df = result_df.loc[columns]
    mean_df.iloc[list(range(0, 18, 2))] = mean_mape_df.iloc[:, list(range(0, 18, 2))].mean(axis=0)
    mean_by_day_df = pd.DataFrame()
    for i in range(0, 27, 9):
        mean_by_day_df = pd.concat([mean_by_day_df, pd.DataFrame(np.mean([mean_df[0+i:3+i], mean_df[3+i:6+i], mean_df[6+i:9+i]], axis=0))])
    mean_by_day_df.index = ['MAPE', 'MAE', 'RMSE'] * 3
    return mean_df, mean_by_day_df

def smoothen_transition_probs(df_mean):
    for row in range(len(df_mean)):
        probs_sum = np.sum([df_mean.at[row, n] for n in df_mean.columns])
        for names in df_mean.columns:
            df_mean.loc[df_mean.index == row, names] = df_mean.loc[df_mean.index == row, names] / probs_sum + 0.000000001
    return df_mean

In [60]:
ward_true_midnight = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-01-22')) & (ward_test_day.index < pd.Timestamp('2019-02-12'))].groupby(by='weekday').mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
ward_true_day = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-01-22')) & (ward_test_day.index < pd.Timestamp('2019-02-12'))].groupby(by='weekday').mean()[['JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
ward_true_night = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-01-22')) & (ward_test_day.index < pd.Timestamp('2019-02-12'))].groupby(by='weekday').mean()[['JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']])

ward_first_midnight = smoothen_transition_probs(ward_test_day[ward_test_day.index < pd.Timestamp('2019-01-01')].groupby(by='weekday').mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
ward_first_day = smoothen_transition_probs(ward_test_day[ward_test_day.index < pd.Timestamp('2019-01-01')].groupby(by='weekday').mean()[['JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
ward_first_night = smoothen_transition_probs(ward_test_day[ward_test_day.index < pd.Timestamp('2019-01-01')].groupby(by='weekday').mean()[['JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']])   

ward_second_midnight = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2018-11-01')) & (ward_test_day.index < pd.Timestamp('2019-01-01'))].groupby(by='weekday').mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
ward_second_day = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2018-11-01')) & (ward_test_day.index < pd.Timestamp('2019-01-01'))].groupby(by='weekday').mean()[['JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
ward_second_night = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2018-11-01')) & (ward_test_day.index < pd.Timestamp('2019-01-01'))].groupby(by='weekday').mean()[['JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]) 

ward_third_midnight = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2018-12-01')) & (ward_test_day.index < pd.Timestamp('2019-01-01'))].groupby(by='weekday').mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
ward_third_day = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2018-12-01')) & (ward_test_day.index < pd.Timestamp('2019-01-01'))].groupby(by='weekday').mean()[['JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
ward_third_night = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2018-12-01')) & (ward_test_day.index < pd.Timestamp('2019-01-01'))].groupby(by='weekday').mean()[['JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]) 

ward_df = pd.concat([ward_true_midnight, ward_true_day, ward_true_night, ward_first_midnight, ward_first_day, ward_first_night, ward_second_midnight, ward_second_day, ward_second_night, ward_third_midnight, ward_third_day, ward_third_night], axis=1)
results_mean, results_mean_day = metrics(ward_df, ['JOL1_prob_00', 'koti_prob_00', 'JOL1_prob_08', 'koti_prob_08', 'JOL1_prob_16', 'koti_prob_16'])

In [61]:
results_mean_day

Unnamed: 0,0
MAPE,71501090.0
MAE,0.03178688
RMSE,0.03658696
MAPE,44791600.0
MAE,0.03010499
RMSE,0.03762957
MAPE,72379150.0
MAE,0.02548242
RMSE,0.03217127


In [45]:
ward_true_midnight = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-03-22')) & (ward_test_day.index < pd.Timestamp('2019-04-12'))].groupby(by='weekday').mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
ward_true_day = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-03-22')) & (ward_test_day.index < pd.Timestamp('2019-04-12'))].groupby(by='weekday').mean()[['JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
ward_true_night = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-03-22')) & (ward_test_day.index < pd.Timestamp('2019-04-12'))].groupby(by='weekday').mean()[['JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']])

ward_first_midnight = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2018-12-01')) & (ward_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
ward_first_day = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2018-12-01')) & (ward_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
ward_first_night = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2018-12-01')) & (ward_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']])   

ward_second_midnight = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-01-01')) & (ward_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
ward_second_day = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-01-01')) & (ward_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
ward_second_night = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-01-01')) & (ward_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]) 

ward_third_midnight = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-02-01')) & (ward_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
ward_third_day = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-02-01')) & (ward_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
ward_third_night = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-02-01')) & (ward_test_day.index < pd.Timestamp('2019-03-01'))].groupby(by='weekday').mean()[['JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]) 

ward_df = pd.concat([ward_true_midnight, ward_true_day, ward_true_night, ward_first_midnight, ward_first_day, ward_first_night, ward_second_midnight, ward_second_day, ward_second_night, ward_third_midnight, ward_third_day, ward_third_night], axis=1)
results_mean, results_mean_day = metrics(ward_df, ['JOL1_prob_00', 'koti_prob_00', 'JOL1_prob_08', 'koti_prob_08', 'JOL1_prob_16', 'koti_prob_16'])

In [46]:
results_mean_day

Unnamed: 0,0
MAPE,205303300.0
MAE,0.03706054
RMSE,0.03886835
MAPE,204009300.0
MAE,0.03389616
RMSE,0.0445958
MAPE,323809400.0
MAE,0.03046996
RMSE,0.03571162


In [47]:
ward_true_midnight = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-11-22')) & (ward_test_day.index < pd.Timestamp('2019-12-13'))].groupby(by='weekday').mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
ward_true_day = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-11-22')) & (ward_test_day.index < pd.Timestamp('2019-12-13'))].groupby(by='weekday').mean()[['JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
ward_true_night = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-11-22')) & (ward_test_day.index < pd.Timestamp('2019-12-13'))].groupby(by='weekday').mean()[['JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']])

ward_first_midnight = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-08-01')) & (ward_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
ward_first_day = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-08-01')) & (ward_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
ward_first_night = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-08-01')) & (ward_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']])   

ward_second_midnight = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-09-01')) & (ward_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
ward_second_day = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-09-01')) & (ward_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
ward_second_night = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-09-01')) & (ward_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]) 

ward_third_midnight = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-10-01')) & (ward_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['JOL1_prob_00', 'muu_prob_00', 'koti_prob_00']])
ward_third_day = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-10-01')) & (ward_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['JOL1_prob_08', 'muu_prob_08', 'koti_prob_08']])
ward_third_night = smoothen_transition_probs(ward_test_day[(ward_test_day.index >= pd.Timestamp('2019-10-01')) & (ward_test_day.index < pd.Timestamp('2019-11-01'))].groupby(by='weekday').mean()[['JOL1_prob_16', 'muu_prob_16', 'koti_prob_16']]) 

ward_df = pd.concat([ward_true_midnight, ward_true_day, ward_true_night, ward_first_midnight, ward_first_day, ward_first_night, ward_second_midnight, ward_second_day, ward_second_night, ward_third_midnight, ward_third_day, ward_third_night], axis=1)
results_mean, results_mean_day = metrics(ward_df, ['JOL1_prob_00', 'koti_prob_00', 'JOL1_prob_08', 'koti_prob_08', 'JOL1_prob_16', 'koti_prob_16'])

In [48]:
results_mean_day

Unnamed: 0,0
MAPE,200447700.0
MAE,0.03949489
RMSE,0.04785082
MAPE,283896700.0
MAE,0.04109354
RMSE,0.04944005
MAPE,304596800.0
MAE,0.03640902
RMSE,0.04488343
