In [42]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Read data

In [43]:
data_path = './preprocessed data/'

In [44]:
df_train = {}
impute =  os.listdir(data_path)
for method in impute:
    goto = data_path + str(method)
    method = method.replace(".csv","")

    if method == 'drop_all_nan':
        df_raw = pd.read_csv(goto, index_col=0).reset_index().round(3).iloc[:,1:]
        df_raw = df_raw[['7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','0','1','2','3','4','5','6']]

    elif method == 'ctgan':
        df_train[method] = pd.read_csv(goto, index_col=0)
        df_train[method] = df_train[method][['7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','0','1','2','3','4','5','6']]
    
    else:
        df_train[method] = pd.read_csv(goto, index_col=0).reset_index().round(3).iloc[:,3:]
        df_train[method] = df_train[method][['7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','0','1','2','3','4','5','6']]


In [45]:
print(df_train.keys())

dict_keys(['ctgan', 'four_avg_drop_nan', 'knn', 'mice', 'three_avg_drop_nan', 'three_next_avg_drop_nan', 'three_pre_avg_drop_nan', 'two_avg_drop_nan', 'two_weighted_drop_nan'])


# Split x, y

In [46]:
X = {}
y = {}

X["raw"] = df_raw.iloc[:,:15].values
y["raw"] = df_raw.iloc[:,15:].values
for method in df_train.keys():
    X[method] = df_train[method].iloc[:,:15].values
    y[method] = df_train[method].iloc[:,15:].values

# RNN (有self-attention)

In [47]:
from keras.models import Sequential
from keras.layers import Input, Dense, MultiHeadAttention, LayerNormalization, Embedding




In [48]:
def create_rnn_model_self(num_heads, vocab_size, embedding_dim, max_sequence_length, output_dim, verbose):
    model = Sequential(
        Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length),
        MultiHeadAttention(num_heads=num_heads, key_dim=embedding_dim), # 添加多頭自注意力層
        LayerNormalization(), # 添加層正規化
        Dense(output_dim, activation='softmax') # 添加最終輸出層
        )
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    if verbose:
        print(model.summary())
    return model


# RNN (沒有self-attention)

In [49]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, LSTM, GRU, Dropout, Dense
from sklearn.model_selection import KFold

In [50]:
def create_rnn_model(timesteps, features, verbose):
    model = Sequential([
        SimpleRNN(50, activation='relu', input_shape=(timesteps, features), return_sequences=True),
        LSTM(50, return_sequences=True),
        GRU(50, activation='relu', return_sequences=False),
        Dense(9)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    if verbose:
        print(model.summary())
    return model

In [51]:
def model_train(X, y, loss, train_size, self_attention):
    for method in df_train.keys():

        # 訓練集全部都是非完整的資料補值
        # X_train, X_test = X[method], X["raw"]
        # y_train, y_test = y[method], y["raw"]

        # 把一半的完整資料加進去訓練集
        X_impute, y_impute = X[method], y[method]
        X_train, y_train = np.concatenate((X_impute, X["raw"][:71]), axis=0), np.concatenate((y_impute, y["raw"][:71]), axis=0)
        X_test, y_test = X["raw"][71:], y["raw"][71:]

        # Reshape input X into a 3D tensor with shape (num_samples, timesteps, features_per_timestep)
        X_train_3d = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
        X_test_3d = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

        if self_attention == False:
            # Create and train the RNN model with 
            model = create_rnn_model(X_train_3d.shape[1], X_train_3d.shape[2],verbose=False)
        else:
            model = create_rnn_model_self(X_train_3d.shape[1], X_train_3d.shape[2],verbose=False)
        model.fit(X_train_3d, y_train, epochs=50, batch_size=16)

        # Evaluate the model on the test set
        test_loss = model.evaluate(X_test_3d, y_test)
        loss[method] = test_loss
        train_size[method] = len(X_train)


In [52]:
loss = {}
train_size = {}
model_train(X, y, loss, train_size, self_attention=True)



Epoch 1/50

Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50

KeyboardInterrupt: 

In [None]:
loss.keys()

dict_keys(['ctgan', 'four_avg_drop_nan', 'knn', 'mice', 'three_avg_drop_nan', 'three_next_avg_drop_nan', 'three_pre_avg_drop_nan', 'two_avg_drop_nan', 'two_weighted_drop_nan'])

In [None]:
loss

{'ctgan': 391.9564514160156,
 'four_avg_drop_nan': 143.21177673339844,
 'knn': 196.69554138183594,
 'mice': 150.58103942871094,
 'three_avg_drop_nan': 190.38792419433594,
 'three_next_avg_drop_nan': 152.2080841064453,
 'three_pre_avg_drop_nan': 178.1807861328125,
 'two_avg_drop_nan': 168.1515350341797,
 'two_weighted_drop_nan': 219.41586303710938}

In [None]:
train_size

{'ctgan': 213,
 'four_avg_drop_nan': 1357,
 'knn': 1530,
 'mice': 1530,
 'three_avg_drop_nan': 1333,
 'three_next_avg_drop_nan': 1333,
 'three_pre_avg_drop_nan': 1356,
 'two_avg_drop_nan': 1330,
 'two_weighted_drop_nan': 597}

In [None]:
# num_rows = 6
# num_cols = 5
# fig, axes = plt.subplots(num_rows, num_cols, figsize=(12, 8))
# axes = axes.flatten() # for easier indexing

# predictions = best_model.predict(best_X_test)

# # Loop through the predictions and actual values
# for i in range(len(X_test)):
#     ax = axes[i]
#     ax.plot(predictions[i].tolist(), label='Predictions')

#     # Plot actual values
#     ax.plot(best_y_test[i].tolist(), label='Actual Values')

#     # Customize the subplot
#     ax.set_title(f'Row {best_test_index[i]}')
#     ax.set_xlabel('Timestep')
#     ax.set_ylabel('Value')
#     # ax.legend()

# # Use tight layout to ensure proper spacing
# plt.tight_layout()

# # Show or save the plot
# plt.show()

# 計算 bp>125 的個數

In [None]:
def nighttime_hpt_count(y, lst):
    for i in range(len(y)):
        lst.append(sum(1 if k>125 else 0 for k in y[i]))

In [None]:
pred_count = []
true_count = []
nighttime_hpt_count(predictions, pred_count)
nighttime_hpt_count(best_y_test, true_count)

NameError: name 'predictions' is not defined

In [None]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(pred_count, true_count)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 5.607142857142857
