In [13]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
import time 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings('ignore', category=UserWarning)
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

2024-08-11 22:43:28.289646: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-08-11 22:43:28.477667: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-08-11 22:43:28.477711: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-08-11 22:43:29.159955: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directo

In [14]:


# 커스텀 BayesianLSTM 셀 구현
class BayesianLSTMCell(tf.keras.layers.LSTMCell):
    def __init__(self, units, **kwargs):
        super(BayesianLSTMCell, self).__init__(units, **kwargs)
        self.units = units
        self.kernel_posterior_fn = tfp.layers.default_mean_field_normal_fn()
        self.kernel_prior_fn = lambda dtype, shape, name, trainable, add_variable_fn: tfp.layers.default_multivariate_normal_fn(
            dtype=dtype, shape=shape, name=name, trainable=trainable, add_variable_fn=add_variable_fn)
        self.recurrent_kernel_posterior_fn = tfp.layers.default_mean_field_normal_fn()
        self.recurrent_kernel_prior_fn = lambda dtype, shape, name, trainable, add_variable_fn: tfp.layers.default_multivariate_normal_fn(
            dtype=dtype, shape=shape, name=name, trainable=trainable, add_variable_fn=add_variable_fn)

    def build(self, input_shape):
        self.kernel_posterior = self.kernel_posterior_fn(
            dtype=tf.float32,
            shape=[input_shape[-1], self.units * 4],
            name='kernel_posterior',
            trainable=True,
            add_variable_fn=self.add_weight
        )
        self.kernel_prior = self.kernel_prior_fn(
            dtype=tf.float32,
            shape=[input_shape[-1], self.units * 4],
            name='kernel_prior',
            trainable=True,
            add_variable_fn=self.add_weight
        )
        self.recurrent_kernel_posterior = self.recurrent_kernel_posterior_fn(
            dtype=tf.float32,
            shape=[self.units, self.units * 4],
            name='recurrent_kernel_posterior',
            trainable=True,
            add_variable_fn=self.add_weight
        )
        self.recurrent_kernel_prior = self.recurrent_kernel_prior_fn(
            dtype=tf.float32,
            shape=[self.units, self.units * 4],
            name='recurrent_kernel_prior',
            trainable=True,
            add_variable_fn=self.add_weight
        )
        super(BayesianLSTMCell, self).build(input_shape)

    def call(self, inputs, states, training=None):
        self.kernel = self.kernel_posterior.sample()
        self.recurrent_kernel = self.recurrent_kernel_posterior.sample()
        return super(BayesianLSTMCell, self).call(inputs, states, training=training)

def build_bayesian_lstm_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        tf.keras.layers.RNN(BayesianLSTMCell(10)),
        Dense(1)
    ])
    # model.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')
    # return model
    kl_divergence = sum(model.losses)
    model.compile(optimizer=Adam(learning_rate=0.01),
                  loss=lambda y_true, 
                  y_pred: tf.keras.losses.mean_squared_error(y_true, y_pred) + kl_divergence)
    return model

In [15]:

def load_and_prepare_data(data_file):
    """
    주어진 CSV 파일을 불러와 기본 전처리를 수행.
    
    매개변수:
    - data_file: CSV 파일의 경로
    
    Returns:
    - data: 전처리된 데이터프레임
    """
    
    # Load the data
    data = pd.read_csv(data_file)

    # Prepare the data
    data['Time'] = pd.to_datetime(data['Time'])


    data['Target_MHC_Water_Level'] = data['MHC_Water_Level'].shift(-3)

    # Fill NaN values in 'Target_MHC_Water_Level' with the last value of 'MHC_Water_Level'
    data['Target_MHC_Water_Level'].fillna(data['MHC_Water_Level'].iloc[-1], inplace=True)
    
    
    
    # # Add lag variables for MHC_Water_Level from t-1 to t-10
    # for i in range(3, 11):
    #     data[f'MHC_Water_Level_lag_{i}'] = data['MHC_Water_Level'].shift(i)
    # data.dropna(inplace=True)
    
    
    #Add lag variables for MHC_Water_Level from t-1 to t-10
    for i in range(3, 6):
        data[f'MHC_Water_Level_lag_{i}'] = data['MHC_Water_Level'].shift(i)
        data[f'MH_Water_Level_lag_{i}'] = data['MH_Water_Level'].shift(i)
        data[f'PG_Water_Level_lag_{i}'] = data['PG_Water_Level'].shift(i)
        data[f'HH_Water_Level_lag_{i}'] = data['HH_Water_Level'].shift(i)
        data[f'GG_Water_Level_lag_{i}'] = data['GG_Water_Level'].shift(i)
    data.dropna(inplace=True)


    


    return data

def split_data(data):
    """
    데이터를 학습 및 테스트 데이터로 분할.
    
    매개변수:
    - data: 전체 데이터프레임
    
    Returns:
    - train_data: 학습 데이터
    - test_data: 테스트 데이터
    """
    # Split data
    train_size = int(len(data) * 0.8)
    train_data = data.iloc[:train_size]
    test_data = data.iloc[train_size:]
    
    return train_data, test_data

In [16]:


def bayes_prepare_train_test_sets(train_data, test_data):
    X_train = train_data.drop(columns=['Time', 'Target_MHC_Water_Level']).values
    y_train = train_data['Target_MHC_Water_Level'].values
    X_test = test_data.drop(columns=['Time', 'Target_MHC_Water_Level']).values
    y_test = test_data['Target_MHC_Water_Level'].values
    return X_train, y_train, X_test, y_test

In [17]:

                data = load_and_prepare_data('water_data.csv')
                train_data, test_data = split_data(data)
                X_train, y_train, X_test, y_test = bayes_prepare_train_test_sets(train_data, test_data)
                
                
                scaler_X = MinMaxScaler(feature_range=(0, 1))
                scaler_y = MinMaxScaler(feature_range=(0, 1))
                X_train_scaled = scaler_X.fit_transform(X_train)
                X_test_scaled = scaler_X.transform(X_test)
                y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
                
                model = build_bayesian_lstm_model(input_shape=(X_train_scaled.shape[1], 1))
                X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
                X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))
                model.fit(X_train_scaled, y_train_scaled, epochs=100, batch_size=32, verbose=1)

2024-08-11 22:43:37.542912: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2024-08-11 22:43:37.542958: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:163] no NVIDIA GPU device is present: /dev/nvidia0 does not exist
2024-08-11 22:43:37.543271: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/100
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Ep

<keras.callbacks.History at 0x7f2d087f7c90>

In [8]:
bayes_pred_uncer =pd.read_csv('bayes_pred_uncer.csv')


In [69]:
def save_bayesian_visual_predictions_to_csv(model, 
                                            x,
                                            time_points, 
                                            scaler_y,
                                            n_iter=100):
    """
    예측 결과를 CSV 파일로 저장하는 함수
    
    매개변수:
    - model: 학습된 Bayesian LSTM 모델
    - x: 테스트 데이터의 특성 (정규화된 상태)
    - time_points: 시간대 포인트 (원래 시간 데이터)
    - scaler_y: 타겟 변수의 스케일러
    - n_iter: 예측 수행 횟수
    - filename: 저장할 CSV 파일 이름
    """

    
    # time_points = time_points[-3:]
    # print(time_points)
    
    predictions_scaled = np.zeros((len(time_points), n_iter))
    
    for j in range(n_iter):
        predictions_scaled[:, j] = model(x, training=True).numpy().flatten()
    
    # 예측 결과 역정규화
    predictions = scaler_y.inverse_transform(predictions_scaled)
    
    # 시간대와 예측 결과를 DataFrame으로 변환
    df_predictions = pd.DataFrame(
                                  predictions, 
                                  columns=[f'Prediction_{i+1}' for i in range(n_iter)])
    df_predictions.insert(0, 'Time', time_points)

    
    return df_predictions


In [71]:
import time


time_points = bayes_pred_uncer['Time']

# 시작 시간 기록
start_time = time.time()

# 실행할 코드
bayes_ppd_data = save_bayesian_visual_predictions_to_csv(model, 
                                                         X_test_scaled,
                                                         time_points, 
                                                         scaler_y,
                                                         n_iter=300)

# 종료 시간 기록
end_time = time.time()

# 경과 시간 계산
elapsed_time = end_time - start_time
print(f"Execution time: {elapsed_time} seconds")


Execution time: 61.48241329193115 seconds


In [72]:
bayes_ppd_data = bayes_ppd_data.assign(Time=time_points.values)


In [83]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import pandas as pd

# @tf.function
def predict_and_save_bayesian_visual_predictions(model, 
                                                 x, 
                                                 time_points, 
                                                 scaler_y, 
                                                 n_iter=5000):
    """
    Bayesian LSTM 모델의 예측 결과를 여러 번 수행하여 불확실성과 신뢰 구간을 포함한
    예측 분포를 계산하고 이를 DataFrame으로 반환하는 함수.
    
    매개변수:
    - model: 학습된 Bayesian LSTM 모델
    - x: 테스트 데이터의 특성 (정규화된 상태)
    - time_points: 시간대 포인트 (원래 시간 데이터)
    - scaler_y: 타겟 변수의 스케일러
    - n_iter: 예측 수행 횟수
    
    반환값:
    - df_predictions: 예측 결과와 신뢰 구간을 포함한 DataFrame
    """

    # 여러 번의 예측 수행
    result = tf.TensorArray(tf.float32, size=n_iter)
    for i in tf.range(n_iter):
        result = result.write(i, model(x, training=True))
    result = result.stack()  # (n_iter, batch_size, output_size)
    
    # 평균 예측 값 계산
    prediction = tf.reduce_mean(result, axis=0)
    
    # 불확실성 계산 (표준편차)
    uncertainty = tf.math.reduce_std(result, axis=0)
    
    # 95% 신뢰구간 (credible interval) 계산
    lower_bound = tfp.stats.percentile(result, 2.5, axis=0)
    upper_bound = tfp.stats.percentile(result, 97.5, axis=0)
    
    # 예측 결과 역정규화
    prediction = scaler_y.inverse_transform(prediction.numpy())
    lower_bound = scaler_y.inverse_transform(lower_bound.numpy())
    upper_bound = scaler_y.inverse_transform(upper_bound.numpy())
    
    # 각 예측을 역정규화하고 DataFrame에 포함시키기 위해 loop
    predictions_scaled = np.zeros((len(time_points), n_iter))
    for j in range(n_iter):
        predictions_scaled[:, j] = scaler_y.inverse_transform(result[j].numpy().flatten().reshape(-1, 1)).flatten()

    # 시간대와 예측 결과를 DataFrame으로 변환
    df_predictions = pd.DataFrame(predictions_scaled, 
                                  columns=[f'Prediction_{i+1}' for i in range(n_iter)])
    df_predictions.insert(0, 'Time', time_points)
    df_predictions['Lower_Bound'] = lower_bound.flatten()
    df_predictions['Upper_Bound'] = upper_bound.flatten()
    df_predictions['Mean_Prediction'] = prediction.flatten()
    df_predictions['Uncertainty'] = uncertainty.numpy().flatten()

    
    # 열 순서 변경: Mean_Prediction, Uncertainty, Lower_Bound, Upper_Bound, Time + 나머지 Prediction 열
    cols = ['Time','Mean_Prediction', 'Uncertainty', 'Lower_Bound', 'Upper_Bound'] + [col for col in df_predictions.columns if col.startswith('Prediction_')]
    df_predictions = df_predictions[cols]
    
    return df_predictions


In [None]:
# 예측 및 불확실성 계산, Bayesian PPD 데이터 생성
time_points = test_data["Time"] + pd.Timedelta(hours=3)
bayes_ppd_data = predict_and_save_bayesian_visual_predictions(model, 
                                                              X_test_scaled, 
                                                              time_points, 
                                                              scaler_y, 
                                                              n_iter=300)

# # 예측 데이터 정리 및 저장
# bayes_pred_uncer = pd.DataFrame({
#     'Time': time_points,
#     "True_Value": test_data['MHC_Water_Level'].shift(-3),
#     "Prediction": bayes_ppd_data['Mean_Prediction'],
#     "Uncertainty": bayes_ppd_data['Uncertainty']
# })

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x7f2cf83f9350>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "/home/idhkdni/.local/lib/python3.7/site-packages/keras/backend.py", line 5134, in <genexpr>
    for ta, out in zip(output_ta_t, flat_output)  File "/home/idhkdni/.local/lib/python3.7/site-packages/tensorflow/python/util/tf_should_use.py", line 245, in wrapped
    error_in_function=error_in_function)


In [None]:
bayes_ppd_data = bayes_ppd_data.assign(Time=time_points.values)

In [None]:
bayes_ppd_data

In [59]:
a

Unnamed: 0,Time,Prediction_1,Prediction_2,Prediction_3,Prediction_4,Prediction_5,Prediction_6,Prediction_7,Prediction_8,Prediction_9,...,Prediction_291,Prediction_292,Prediction_293,Prediction_294,Prediction_295,Prediction_296,Prediction_297,Prediction_298,Prediction_299,Prediction_300
0,2024-08-11 22:00:00,1.526106,1.52271,1.523642,1.523435,1.523199,1.523363,1.520638,1.518787,1.527128,...,1.521467,1.521581,1.531349,1.5215,1.519252,1.521794,1.522144,1.521108,1.523878,1.523028
1,2024-08-11 23:00:00,1.539325,1.535879,1.536786,1.536638,1.536485,1.53648,1.533793,1.532016,1.540433,...,1.53484,1.534775,1.544807,1.534592,1.532485,1.535127,1.535393,1.534505,1.536927,1.536253
2,2024-08-12 00:00:00,1.552055,1.548527,1.549371,1.549372,1.549243,1.549096,1.546424,1.544828,1.553193,...,1.547707,1.547452,1.557702,1.547342,1.545184,1.547853,1.548098,1.547339,1.549518,1.549069


In [None]:
# # CSV 파일로 저장
# bayes_pred_uncer.to_csv('../streamlit/data/bayes_pred_uncer.csv', index=False)
# bayes_ppd_data.to_csv('../streamlit/data/bayesian_ppd_visual.csv', index=False)

# print('Bayesian LSTM Model Learning & Prediction Finish\n')