### type A GRU 모델 학습 + SHAP

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler
from tensorflow.keras.models import Sequential
import tensorflow as tf
from tensorflow.keras.layers import GRU, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

df = pd.read_csv("/content/최최종데이터_a_지난주추가.csv")
df.sort_values(by=["id", "week", "obj_num"], inplace=True)
columns = df.columns[4:]

# 데이터 스케일링
scaler = RobustScaler()
df[columns] = scaler.fit_transform(df[columns])

# 독립 변수와 종속 변수 분리
X = df.drop(columns=['flw_get'])
y = df['flw_get']

# 주차(week) 정보를 기준으로 데이터를 그룹화
week_groups = df.groupby('week')

# 5주차부터 25주차까지를 훈련 데이터로 선택
train_weeks = list(range(5, 26))
train_data = pd.concat([group for week, group in week_groups if week in train_weeks])

# 26주차부터 30주차까지를 테스트 데이터로 선택
test_weeks = list(range(26, 31))
test_data = pd.concat([group for week, group in week_groups if week in test_weeks])# 주차(week) 정보를 기준으로 데이터를 그룹화

# 독립 변수와 종속 변수 분리
X_train = train_data.drop(columns=['flw_get', 'id', 'week', 'obj_num', 'week_start_date', 'last_flw_get'])  # 독립 변수 (week_start_date 열 제거)
y_train = train_data['flw_get']  # 종속 변수
X_test = test_data.drop(columns=['flw_get', 'id', 'week', 'obj_num', 'week_start_date', 'last_flw_get'])  # 독립 변수 (week_start_date 열 제거)
y_test = test_data['flw_get']  # 종속 변수

def validation(X_test, y_test, model, title):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    # 검증 결과 출력
    print("검증 MSE:", mse)
    print("검증 RMSE:", rmse)

    plt.figure(figsize=(15, 10))

    plt.plot(range(len(y_test)), y_test, color='blue')
    plt.plot(range(len(y_pred)), y_pred, color='red')

    for i in range(1, 11):
        plt.axvline(x=20 * i, linestyle='dotted')

    plt.title("GRU A Model")
    plt.savefig(f"{title}.png")

    plt.show()

    return mse, rmse

# random seed 설정
tf.random.set_seed(42)

# 하이퍼 파라미터
num_units = 64
num_layers = 3
learning_rate = 0.0001
epochs = 200
batch_size = 16

model = Sequential()
model.add(GRU(num_units, input_shape=(X_train.shape[1], 1), return_sequences=True))
for _ in range(num_layers - 1):
    model.add(GRU(num_units, return_sequences=True))
model.add(GRU(num_units))
model.add(Dense(1, activation='linear'))

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss='mean_squared_error')

model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)
# models.append(model)

# 검증 결과 출력
mean_mse, mean_rmse = validation(X_test, y_test, model, "GRU Model")

In [None]:
!pip install shap

import shap

# shap_values 계산을 위한 masker 생성
masker = shap.maskers.Independent(data=X_train.values)

# shap.Explainer에 masker 전달
explainer = shap.Explainer(model, masker)

# shap_values 계산
shap_values = explainer.shap_values(X_test.values)

In [None]:
# SHAP summary plot 그리기
shap.summary_plot(shap_values, features=X_test.values, feature_names=X_test.columns, show=False)
plt.title("GRU A with SHAP - Summary Plot")
plt.tight_layout()  # 레이아웃 조절
plt.savefig("gru_shap_summary_plot.png")
plt.show()

In [None]:
# SHAP bar plot 그리기
shap.summary_plot(shap_values, features=X_test.values, feature_names=X_test.columns, plot_type='bar', show=False)
plt.title("GRU A with SHAP - Bar Plot")
plt.tight_layout()  # 레이아웃 조절
plt.savefig("gru_shap_bar_plot.png")
plt.show()

In [None]:
# 선택한 특성의 이름
for selected_feature_name in X_test.columns:
    print(selected_feature_name)
    selected_feature_index = df.columns.get_loc(selected_feature_name) - 6

    # RobustScaler의 중앙값과 IQR 이용
    center = scaler.center_[selected_feature_index]
    scale = scaler.scale_[selected_feature_index]

    # 스케일링된 데이터를 원래 값으로 역변환
    feature_values_original = X_test[selected_feature_name].values * scale + center

    # dependence plot 그리기
    plt.figure(figsize=(10, 6))

    plt.scatter(feature_values_original, shap_values[:, selected_feature_index], c=y_test)
    plt.xlabel(selected_feature_name)
    plt.ylabel('SHAP Value')
    plt.title(f'GRU A with SHAP - Dependence Plot for {selected_feature_name}')
    plt.colorbar(label='Actual Target Value')
    plt.savefig(f"{selected_feature_name}_dependence_plot_original.png")
    plt.show()

# SHAP 값 계산
shap_values = explainer(X_test)

# feature importance plot 그리기
shap.plots.bar(shap_values)