In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, MultiHeadAttention, LayerNormalization, Add
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

In [2]:
# 1. 데이터 로드 및 전처리
# 파일 경로
pollutants_path = '../../NewData/Weekly_Air_Pollutants.csv'
temperature_path = '../../ClimateDataTeam/climate_data/merged_weekly_avg_temp.csv'

# 데이터 로드
pollutants_df = pd.read_csv(pollutants_path)
temperature_df = pd.read_csv(temperature_path)

# 속성값 정의
pollutants_column = 
climate_column = 

# datetime 컬럼 변환 및 병합
pollutants_df['datetime'] = pd.to_datetime(pollutants_df['datetime'])
temperature_df['datetime'] = pd.to_datetime(temperature_df['datetime'])
merged_df = pd.merge(pollutants_df, temperature_df, on='datetime', how='inner')

In [3]:
# X와 y 분리
X = merged_df.drop(columns=['datetime', 'temp', 'humidity', 'pressure'])  # datetime 및 출력 변수 제외
y = merged_df[['temp', 'humidity', 'pressure']]  # 출력 변수

KeyError: "['pressure'] not found in axis"

In [None]:
# 2. 시계열 윈도우 생성 함수
def create_time_series_features(X, y, lag):
    X_features, y_labels = [], []
    for i in range(len(X) - lag):
        X_features.append(X.iloc[i:i+lag].values)
        y_labels.append(y.iloc[i+lag].values)
    return np.array(X_features), np.array(y_labels)

# 시계열 윈도우 생성
lag = 10  # 과거 10주 데이터를 사용
X_ts, y_ts = create_time_series_features(X, y, lag)

# 데이터 확인
print(f"X_ts shape: {X_ts.shape}, y_ts shape: {y_ts.shape}")

In [None]:
# 3. Transformer 블록 정의
def transformer_block(x, num_heads, key_dim, ff_dim, dropout_rate):
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(x, x)
    attn_output = Dropout(dropout_rate)(attn_output)
    out1 = Add()([x, attn_output])
    out1 = LayerNormalization(epsilon=1e-6)(out1)

    ffn_output = Dense(ff_dim, activation="relu")(out1)
    ffn_output = Dropout(dropout_rate)(ffn_output)
    out2 = Add()([out1, ffn_output])
    out2 = LayerNormalization(epsilon=1e-6)(out2)

    return out2

In [None]:
# 4. 모델 생성
def create_model(input_shape, output_dim, num_heads=4, key_dim=32, ff_dim=128, dropout_rate=0.1):
    inputs = Input(shape=input_shape)

    # LSTM Layer
    x = LSTM(64, return_sequences=True, activation="relu")(inputs)
    x = Dropout(0.2)(x)

    # Transformer Block
    x = transformer_block(x, num_heads=num_heads, key_dim=key_dim, ff_dim=ff_dim, dropout_rate=dropout_rate)

    # Flatten Transformer Output
    x = LSTM(32, activation="relu", return_sequences=False)(x)
    x = Dropout(0.2)(x)

    # Output Layer
    outputs = Dense(output_dim)(x)

    model = Model(inputs, outputs)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
    return model

In [None]:
# 5. 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X_ts, y_ts, test_size=0.2, random_state=42)

In [None]:
# 6. 모델 정의 및 학습
input_shape = (X_train.shape[1], X_train.shape[2])  # (lag, 입력 특성 수)
output_dim = y_ts.shape[1]  # 출력 변수 수
model = create_model(input_shape, output_dim)

In [None]:
# 모델 학습
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
# 7. 예측 및 평가
y_pred = model.predict(X_test)

# 평가
mae = mean_absolute_error(y_test, y_pred, multioutput='raw_values')
mse = mean_squared_error(y_test, y_pred, multioutput='raw_values')

print(f"MAE per output variable: {mae}")
print(f"MSE per output variable: {mse}")