In [None]:
!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf
!pip install EMD-signal
!apt-get install -y build-essential

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
fonts-nanum is already the newest version (20200506-1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.
/usr/share/fonts: caching, new cache contents: 0 fonts, 1 dirs
/usr/share/fonts/truetype: caching, new cache contents: 0 fonts, 3 dirs
/usr/share/fonts/truetype/humor-sans: caching, new cache contents: 1 fonts, 0 dirs
/usr/share/fonts/truetype/liberation: caching, new cache contents: 16 fonts, 0 dirs
/usr/share/fonts/truetype/nanum: caching, new cache contents: 12 fonts, 0 dirs
/usr/local/share/fonts: caching, new cache contents: 0 fonts, 0 dirs
/root/.local/share/fonts: skipping, no such directory
/root/.fonts: skipping, no such directory
/usr/share/fonts/truetype: skipping, looped directory detected
/usr/share/fonts/truetype/humor-sans: skipping, looped directory detected
/usr/share/fonts/truetype/liberation: skipping, looped directory detected
/usr/share/fonts/truetype/

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense,  Dropout, BatchNormalization, Masking,Conv1D, MaxPooling1D,Bidirectional, LSTM, Flatten
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import scipy.signal as signal
from scipy.signal import savgol_filter
from PyEMD import CEEMDAN

In [None]:
# 시드 고정
tf.random.set_seed(2024)
np.random.seed(2024)
plt.rc('font', family='NanumGothic')

In [None]:
# 데이터 불러오기
data=pd.read_csv('도매지역별2_서울,광주처리.csv',encoding='utf-8')
data['총거래물량']=data['총거래물량'].apply(lambda x: abs(x))
data=data.set_index('날짜')
loc=['서울','광주','대전','부산','대구']

In [None]:
# 시계열로 만드는 함수
def create_sequences(features, target, time_steps):
    X, y = [], []
    for i in range(len(features) - time_steps):
        X.append(features[i:i+time_steps])
        y.append(target[i+time_steps])
    return np.array(X), np.array(y)

In [None]:
# emd 원하는 수를 넣으면 그 수에 맞게 imf를 더해주는 함수
def process_train_with_emd(y_train, num_imfs_to_use):
    # CEEMDAN 객체 생성
    emd = CEEMDAN()
    emd.noise_seed(42)
    # y_train이 1D 배열인지 확인 (필요 시 평탄화)
    y_train = y_train.flatten()

    # EMD 분해 수행
    imfs = emd(y_train)
    print(imfs.shape)
    # 분해된 IMF와 residual 확인
    residual = imfs[-1]  # 마지막 행은 residual
    for i in range(-num_imfs_to_use, -1):
      residual+=imfs[i]# 원하는 개수의 IMF 선택


    return residual.reshape(-1, 1)

In [None]:
def lstm_set(data, 도매지역='광주',c=1):
    # 도매지역 필터링
    data = data[(data['도매지역'] == 도매지역)]
    data = data.drop(['도매지역'], axis=1)

    # 특성과 타겟 정의
    features = data[['평균기온(°C)','최저기온(°C)','최고기온(°C)','강수 계속시간(hr)','일강수량(mm)','최대 풍속(m/s)','평균 풍속(m/s)','평균 상대습도(%)','합계 일조시간(hr)','합계 일사량(MJ/m2)','일 최심신적설(cm)']].values
    target = data["총거래물량"].values

    # 스케일링 (Min-Max)
    scaler = MinMaxScaler()
    features_scaled = scaler.fit_transform(features)
    target_scaled = scaler.fit_transform(target.reshape(-1, 1))

    # 시퀀스 생성
    time_steps = 80
    X, y = create_sequences(features_scaled, target_scaled, time_steps)

    # 데이터셋 분할 (훈련 70%, 테스트 30%)
    train_size = int(len(X) * 0.7)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    y_train=process_train_with_emd(y_train, num_imfs_to_use=c)


    print(type(y_train))
    print(y_train.shape)

    # CNN-BiLSTM 모델 정의
    model = Sequential()
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(MaxPooling1D(pool_size=2))
    # model.add(Bidirectional(LSTM(128, activation="relu", return_sequences=True)))
    # model.add(Bidirectional(LSTM(64, activation="relu", return_sequences=True)))
    model.add(LSTM(50, activation="relu"))
    model.add(Dense(1))  # 출력 레이어
    model.compile(optimizer="adam", loss="mse")

    # 모델 학습
    early_stopping = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
    history = model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping])

    # 모델 평가 및 예측
    train_pred = model.predict(X_train)
    test_pred = model.predict(X_test)

    # 결과를 원래 스케일로 역변환
    train_pred = scaler.inverse_transform(train_pred)
    y_train = scaler.inverse_transform(y_train)
    test_pred = scaler.inverse_transform(test_pred)
    y_test = scaler.inverse_transform(y_test)

    # 성능 평가
    mae = mean_absolute_error(y_test, test_pred)
    mse = mean_squared_error(y_test, test_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, test_pred)

    return y_train, train_pred, y_test, test_pred, mae, mse, rmse, r2

In [None]:
# emd 합친 개수 별 성능 저장
res=[]
for i in range(1,9):
  res.append(lstm_set(data,도매지역='부산',c=i))

(9, 1191)
<class 'numpy.ndarray'>
(1191, 1)
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 0.0022 - val_loss: 0.0033
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - loss: 7.1693e-04 - val_loss: 0.0026
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 6.6475e-04 - val_loss: 0.0025
Epoch 4/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 6.0542e-04 - val_loss: 0.0025
Epoch 5/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 5.4764e-04 - val_loss: 0.0026
Epoch 6/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 4.5994e-04 - val_loss: 0.0025
Epoch 7/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - loss: 4.2803e-04 - val_loss: 0.0024
Epoch 8/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 3.6200e-04 - val_loss: 0.0025
Epoch 9/100
[1m75/75[0

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 38ms/step - loss: 0.0047 - val_loss: 0.0031
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 7.8115e-04 - val_loss: 0.0028
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 7.3092e-04 - val_loss: 0.0029
Epoch 4/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 6.7842e-04 - val_loss: 0.0031
Epoch 5/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 6.1670e-04 - val_loss: 0.0029
Epoch 6/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 5.1348e-04 - val_loss: 0.0026
Epoch 7/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 34ms/step - loss: 3.2091e-04 - val_loss: 0.0024
Epoch 8/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 2.5471e-04 - val_loss: 0.0022
Epoch 9/100
[1m75/75[0

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - loss: 0.0027 - val_loss: 0.0020
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - loss: 0.0018 - val_loss: 0.0020
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.0015 - val_loss: 0.0019
Epoch 4/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 8.5543e-04 - val_loss: 0.0020
Epoch 5/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 9.3191e-04 - val_loss: 0.0021
Epoch 6/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 4.5817e-04 - val_loss: 0.0019
Epoch 7/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 2.7018e-04 - val_loss: 0.0019
Epoch 8/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 2.8085e-04 - val_loss: 0.0019
Epoch 9/100
[1m75/75[0m [32m━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - loss: 0.0062 - val_loss: 0.0017
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 0.0026 - val_loss: 0.0016
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 0.0023 - val_loss: 0.0018
Epoch 4/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step - loss: 0.0016 - val_loss: 0.0021
Epoch 5/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - loss: 0.0012 - val_loss: 0.0022
Epoch 6/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 8.8123e-04 - val_loss: 0.0020
Epoch 7/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 4.8435e-04 - val_loss: 0.0019
Epoch 8/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 2.9534e-04 - val_loss: 0.0018
Epoch 9/100
[1m75/75[0m [32m━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - loss: 0.0068 - val_loss: 0.0021
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 0.0039 - val_loss: 0.0043
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 0.0035 - val_loss: 0.0029
Epoch 4/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.0026 - val_loss: 0.0031
Epoch 5/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - loss: 0.0024 - val_loss: 0.0030
Epoch 6/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - loss: 0.0016 - val_loss: 0.0021
Epoch 7/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 9.8898e-04 - val_loss: 0.0020
Epoch 8/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 5.0022e-04 - val_loss: 0.0023
Epoch 9/100
[1m75/75[0m [32m━━━━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 40ms/step - loss: 0.0062 - val_loss: 0.0039
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step - loss: 0.0046 - val_loss: 0.0018
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - loss: 0.0039 - val_loss: 0.0018
Epoch 4/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 0.0037 - val_loss: 0.0020
Epoch 5/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 0.0035 - val_loss: 0.0020
Epoch 6/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.0033 - val_loss: 0.0025
Epoch 7/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - loss: 0.0031 - val_loss: 0.0025
Epoch 8/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - loss: 0.0027 - val_loss: 0.0023
Epoch 9/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - loss: 0.0122 - val_loss: 0.0020
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - loss: 0.0048 - val_loss: 0.0021
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step - loss: 0.0043 - val_loss: 0.0018
Epoch 4/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - loss: 0.0040 - val_loss: 0.0018
Epoch 5/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.0037 - val_loss: 0.0023
Epoch 6/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - loss: 0.0035 - val_loss: 0.0025
Epoch 7/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 0.0034 - val_loss: 0.0021
Epoch 8/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step - loss: 0.0031 - val_loss: 0.0027
Epoch 9/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - loss: 0.0068 - val_loss: 0.0025
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.0053 - val_loss: 0.0016
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - loss: 0.0048 - val_loss: 0.0017
Epoch 4/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.0049 - val_loss: 0.0020
Epoch 5/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.0045 - val_loss: 0.0019
Epoch 6/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step - loss: 0.0048 - val_loss: 0.0026
Epoch 7/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.0045 - val_loss: 0.0044
Epoch 8/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - loss: 0.0045 - val_loss: 0.0038
Epoch 9/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━

In [None]:
r=[]
for i in res:
  r.append(i[4:])

In [None]:
# imf 개수별 성능
서울값=res
서울=pd.DataFrame(r,columns=['mae','mse','rmse','r2'])
서울

Unnamed: 0,mae,mse,rmse,r2
0,2151581.0,8728550000000.0,2954412.0,-0.029456
1,1909170.0,6580855000000.0,2565318.0,0.223846
2,1608468.0,4863925000000.0,2205431.0,0.426343
3,1573496.0,4455275000000.0,2110752.0,0.474539
4,1550873.0,4377196000000.0,2092175.0,0.483748
5,1644765.0,5162110000000.0,2272028.0,0.391174
6,1713632.0,5046711000000.0,2246488.0,0.404785
7,1998021.0,7782124000000.0,2789646.0,0.082167


In [None]:
광주값=res
광주=pd.DataFrame(r,columns=['mae','mse','rmse','r2'])
광주

Unnamed: 0,mae,mse,rmse,r2
0,448661.85905,397418200000.0,630411.123259,-0.01643
1,486477.876744,425911000000.0,652618.596756,-0.089303
2,456210.018848,369113000000.0,607546.74126,0.055962
3,380352.135766,302075900000.0,549614.360196,0.227415
4,364597.0151,279882800000.0,529039.531442,0.284176
5,359795.520971,267029600000.0,516749.053606,0.317049
6,368795.834327,293743100000.0,541980.742433,0.248727
7,339131.43977,244672100000.0,494643.442442,0.37423


In [None]:
대전값=res
대전=pd.DataFrame(r,columns=['mae','mse','rmse','r2'])
대전

Unnamed: 0,mae,mse,rmse,r2
0,332258.63509,203748700000.0,451385.32123,-0.003945
1,292715.775077,152979200000.0,391125.588018,0.246215
2,262016.885555,123866900000.0,351947.319416,0.389662
3,282262.921545,142501600000.0,377493.820911,0.297842
4,225225.298279,99922580000.0,316105.329396,0.507645
5,227248.777313,109868700000.0,331464.538555,0.458636
6,230297.305658,112768800000.0,335810.716186,0.444347
7,270649.780358,142145200000.0,377021.502923,0.299598


In [None]:
대구값=res
대구=pd.DataFrame(r,columns=['mae','mse','rmse','r2'])
대구

Unnamed: 0,mae,mse,rmse,r2
0,242747.418719,106248200000.0,325957.37396,-0.087966
1,229029.412971,101513800000.0,318612.246001,-0.039485
2,183844.452519,79574410000.0,282089.364434,0.18517
3,156484.75719,59863450000.0,244670.079351,0.387007
4,156414.018393,59788140000.0,244516.136807,0.387779
5,152481.363262,50700630000.0,225168.005204,0.480833
6,156217.925217,58647100000.0,242171.627603,0.399463
7,167170.348894,52253090000.0,228589.35853,0.464936


In [None]:
부산값=res
부산=pd.DataFrame(r,columns=['mae','mse','rmse','r2'])
부산

Unnamed: 0,mae,mse,rmse,r2
0,147191.9419,33458120000.0,182915.602784,0.001152
1,141373.761311,31530620000.0,177568.644947,0.058695
2,129156.247462,28216770000.0,167978.495449,0.157625
3,116614.63871,24854300000.0,157652.474903,0.258008
4,133219.635669,30202200000.0,173787.804156,0.098353
5,129047.561163,26624990000.0,163171.657161,0.205146
6,123757.218644,27815960000.0,166781.176055,0.169591
7,123479.337448,24464610000.0,156411.675811,0.269641


In [None]:
4-0.52
7-0.49
8-0.5299 best