# MAML-rnn - MCO two way

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Dropout, Embedding, Concatenate, Flatten, RepeatVector
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import warnings
import random
import os
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy import text

warnings.filterwarnings("ignore")

# 재현성을 위한 시드 설정
seed_value = 42
np.random.seed(seed_value)
tf.random.set_seed(seed_value)
random.seed(seed_value)

# 환경 변수 설정
os.environ['PYTHONHASHSEED'] = str(seed_value)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

# 데이터베이스 연결 설정
db_host = 'kaylee-db.cbgcswckszgl.us-east-1.rds.amazonaws.com'
db_port = 3306
db_user = 'lee'
db_password = '1111'
db_name = 'panel_reduced_reshaped_db'

# SQLAlchemy 엔진 생성
engine = create_engine(f'mysql+pymysql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')

# 데이터베이스에서 데이터 로드 (특정 허브 공항 MCO 중심) -- Twoway
with engine.connect() as connection:
    query = text("""
    SELECT route, date, Passengers
    FROM panel_reduced_reshaped
    WHERE `Destination Airport` = 'ATL' 
       OR `Origin Airport` = 'ATL' 
    ORDER BY route, date
    """)
    df = pd.read_sql(query, connection, parse_dates=['date'])  # 'date' 컬럼을 datetime으로 파싱
    print(df.head())
    
# 날짜 범위 확인
print(f"Date range in original data: {df['date'].min()} to {df['date'].max()}")

# 'route' 컬럼 정리
df['route'] = df['route'].str.strip().str.upper()  # 공백 제거 및 대문자 변환

# 노선 선택
num_routes = min(2000, df['route'].nunique()) ################## 전체 왕복 선택
unique_routes = df['route'].unique()[:num_routes]
df = df[df['route'].isin(unique_routes)].reset_index(drop=True)

# 노선 인코딩
label_encoder = LabelEncoder()
df['Route_ID'] = label_encoder.fit_transform(df['route'])
num_routes = len(label_encoder.classes_)

# 시퀀스 길이 설정
sequence_length = 12  # 12개월 시퀀스

# 데이터의 최소 및 최대 날짜 확인
min_date = df['date'].min()
max_date = df['date'].max()
print(f"Data Date Range: {min_date.date()} to {max_date.date()}")

# 시퀀스 생성
def generate_sequences(data, seq_length):
    sequences = []
    for route in data['route'].unique():
        route_data = data[data['route'] == route].sort_values('date')
        passenger_counts = route_data['Passengers'].values
        dates_list = route_data['date'].values
        route_id = route_data['Route_ID'].iloc[0]

        if len(passenger_counts) < seq_length + 1:
            continue

        for i in range(len(passenger_counts) - seq_length):
            seq = passenger_counts[i:i + seq_length]
            target = passenger_counts[i + seq_length]
            target_date = dates_list[i + seq_length]

            sequences.append({
                'Sequence': seq,
                'Target': target,
                'Route_ID': route_id,
                'Date': target_date,
                'Route': route
            })
    return sequences

sequences = generate_sequences(df, sequence_length)
data = pd.DataFrame(sequences)

# 날짜 파싱 확인
print(f"Date range in sequences data: {data['Date'].min().date()} to {data['Date'].max().date()}")

# 데이터 정렬
data = data.sort_values('Date').reset_index(drop=True)

# 작업 생성 및 분할
def create_fomaml_tasks(data, support_size, query_size):
    tasks = []
    for route in data['Route'].unique():
        route_data = data[data['Route'] == route].sort_values('Date').reset_index(drop=True)
        total_samples = len(route_data)
        if total_samples < support_size + query_size:
            continue

        # 시간 기반 분할
        support_set = route_data.iloc[:support_size]
        query_set = route_data.iloc[support_size:support_size + query_size]

        if len(support_set) >= 1 and len(query_set) >= 1:
            tasks.append({
                'route': route,
                'support_set': support_set,
                'query_set': query_set
            })
    return tasks

support_size = 10
query_size = 6  # 마지막 6개월
tasks = create_fomaml_tasks(data, support_size, query_size)
print(f"Number of tasks: {len(tasks)}")

# 모든 작업을 메타-훈련과 메타-테스트에 사용
meta_train_tasks = tasks
meta_test_tasks = tasks

# 작업 스케일링
def scale_task(task):
    X_support = np.array([seq for seq in task['support_set']['Sequence']])
    y_support = task['support_set']['Target'].values.reshape(-1, 1)
    X_query = np.array([seq for seq in task['query_set']['Sequence']])
    y_query = task['query_set']['Target'].values.reshape(-1, 1)

    # 지원 세트로 스케일러 피팅
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()
    scaler_X.fit(X_support.reshape(-1, 1))
    scaler_y.fit(y_support)

    # 지원 세트와 쿼리 세트 변환
    X_support_scaled = scaler_X.transform(X_support.reshape(-1, 1)).reshape(-1, sequence_length, 1)
    y_support_scaled = scaler_y.transform(y_support)
    X_query_scaled = scaler_X.transform(X_query.reshape(-1, 1)).reshape(-1, sequence_length, 1)
    y_query_scaled = scaler_y.transform(y_query)

    task['X_support'] = X_support_scaled
    task['y_support'] = y_support_scaled
    task['X_query'] = X_query_scaled
    task['y_query'] = y_query_scaled
    task['scaler_X'] = scaler_X  # 역변환을 위한 스케일러 저장
    task['scaler_y'] = scaler_y
    return task

meta_train_tasks = [scale_task(task) for task in meta_train_tasks]
meta_test_tasks = meta_train_tasks  # 동일한 작업 사용

# 모델 정의 - RNN
def create_model():
    embedding_dim = int(np.sqrt(num_routes))

    passenger_input = Input(shape=(sequence_length, 1), name='Passenger_Input')
    route_input = Input(shape=(1,), name='Route_Input')

    route_embedding = Embedding(input_dim=num_routes, output_dim=embedding_dim, input_length=1)(route_input)
    route_embedding = Flatten()(route_embedding)
    route_embedding_repeated = RepeatVector(sequence_length)(route_embedding)

    combined_input = Concatenate(axis=2)([passenger_input, route_embedding_repeated])

    # RNN 레이어 정의
    rnn_out = SimpleRNN(32, activation='tanh', kernel_regularizer=l2(0.01))(combined_input)
    rnn_out = Dropout(0.3)(rnn_out)  # 드롭아웃 비율 조정

    output = Dense(1, activation='linear')(rnn_out)

    model = Model(inputs=[passenger_input, route_input], outputs=output)
    return model

meta_model = create_model()

# 하이퍼파라미터
meta_epochs = 20  # 에포크 수 설정 20으로 증가
adaptation_steps = min(5, support_size)
adaptation_learning_rate = 1e-2 ################# 1e-2 에서 2e-2으로 일시 변경
meta_learning_rate = 1e-3 ###################1e-3 에서 2e-3 로 일시 변경

meta_optimizer = tf.keras.optimizers.Adam(learning_rate=meta_learning_rate)
loss_fn = tf.keras.losses.MeanSquaredError()

# 메타-훈련 루프
best_query_loss = float('inf')  # 최소 Query Loss를 추적하기 위한 초기 값
best_weights = None  # 최적 파라미터를 저장할 변수

for epoch in range(meta_epochs):
    print(f"\nMeta-training Epoch {epoch + 1}/{meta_epochs}")
    epoch_support_loss = 0
    epoch_query_loss = 0
    num_tasks = len(meta_train_tasks)

    for task in meta_train_tasks:
        # 데이터 가져오기
        X_support = task['X_support']
        y_support = task['y_support']
        X_query = task['X_query']
        y_query = task['y_query']
        route_id = task['support_set']['Route_ID'].iloc[0]

        # 내부 루프
        with tf.GradientTape() as tape:
            # 적응된 모델
            adapted_model = create_model()
            adapted_model.set_weights(meta_model.get_weights())

            # 적응 단계용 옵티마이저 생성
            adaptation_optimizer = tf.keras.optimizers.SGD(learning_rate=adaptation_learning_rate)

            # 적응 단계
            for _ in range(adaptation_steps):
                with tf.GradientTape() as inner_tape:
                    preds_support = adapted_model([X_support, np.full((len(X_support), 1), route_id)], training=True)
                    support_loss = loss_fn(y_support, preds_support)
                grads = inner_tape.gradient(support_loss, adapted_model.trainable_variables)
                adaptation_optimizer.apply_gradients(zip(grads, adapted_model.trainable_variables))

            # 쿼리 세트에서 손실 계산
            preds_query = adapted_model([X_query, np.full((len(X_query), 1), route_id)], training=False)
            query_loss = loss_fn(y_query, preds_query)

        # 손실 누적
        epoch_support_loss += support_loss.numpy()
        epoch_query_loss += query_loss.numpy()

        # 쿼리 손실에 대한 그래디언트 계산
        grads = tape.gradient(query_loss, adapted_model.trainable_variables)
        # 메타-모델에 그래디언트 적용
        meta_optimizer.apply_gradients(zip(grads, meta_model.trainable_variables))

    # 에포크별 평균 손실 계산
    avg_support_loss = epoch_support_loss / num_tasks
    avg_query_loss = epoch_query_loss / num_tasks
    print(f"Average Support Loss: {avg_support_loss:.4f}, Average Query Loss: {avg_query_loss:.4f}")
    print(f"Meta-training Epoch {epoch + 1} completed.")

    # 최적의 Query Loss에서 가중치 저장
    if avg_query_loss < best_query_loss:
        best_query_loss = avg_query_loss
        best_weights = meta_model.get_weights()  # 최적 가중치 저장

# 메타-훈련 종료 후 최적 가중치 설정
meta_model.set_weights(best_weights)
print("Meta-training completed. Using the weights with the lowest query loss.")

# 메타-테스트 (파인튜닝 및 평가)
performance_metrics = []
adapted_models = {}  # 노선별로 파인튜닝된 모델 저장

# 수정된 코드: 노선 이름순으로 정렬하여 성능 계산
meta_test_tasks_sorted = sorted(meta_test_tasks, key=lambda x: x['route'])

# 성능 지표 계산 (메타-테스트의 파인튜닝 및 평가 부분에 해당)
for task in meta_test_tasks_sorted:
    route = task['route']
    print(f"\nFine-tuning on route: {route}")
    # 데이터 가져오기
    X_support = task['X_support']
    y_support = task['y_support']
    X_query = task['X_query']
    y_query = task['y_query']
    route_id = task['support_set']['Route_ID'].iloc[0]
    route_id_tensor = tf.constant([[route_id]], dtype=tf.int32)

    scaler_y = task['scaler_y']  # 노선별 스케일러 사용

    # 메타-모델 복제
    adapted_model = create_model()
    adapted_model.set_weights(meta_model.get_weights())
    optimizer = tf.keras.optimizers.SGD(learning_rate=adaptation_learning_rate)

    # 지원 세트로 파인튜닝
    for _ in range(adaptation_steps):
        with tf.GradientTape() as tape:
            preds = adapted_model([X_support, np.repeat(route_id_tensor, len(X_support), axis=0)], training=True)
            loss = loss_fn(y_support, preds)
        grads = tape.gradient(loss, adapted_model.trainable_variables)
        optimizer.apply_gradients(zip(grads, adapted_model.trainable_variables))

    # 쿼리 세트에서 예측
    query_preds_scaled = adapted_model([X_query, np.repeat(route_id_tensor, len(X_query), axis=0)], training=False)
    y_pred = scaler_y.inverse_transform(query_preds_scaled.numpy()).flatten()
    y_actual = task['query_set']['Target'].values.flatten()

    # 성능 지표 계산
    mse = mean_squared_error(y_actual, y_pred)
    mae = mean_absolute_error(y_actual, y_pred)
    rmse = np.sqrt(mse)
    
    # MAPE 계산 (0으로 나누기 방지)
    non_zero_indices = y_actual != 0
    if np.any(non_zero_indices):
        mape = np.mean(np.abs((y_actual[non_zero_indices] - y_pred[non_zero_indices]) / y_actual[non_zero_indices])) * 100
    else:
        mape = np.nan  # 실제 값이 모두 0인 경우 정의되지 않음

    # SMAPE 계산
    smape = np.mean(2 * np.abs(y_actual - y_pred) / (np.abs(y_actual) + np.abs(y_pred))) * 100

    print(f"Performance on route {route}: MSE={mse:.2f}, MAE={mae:.2f}, RMSE={rmse:.2f}, MAPE={mape:.2f}%, SMAPE={smape:.2f}%")

    # 성능 지표 수집
    performance_metrics.append({
        'Route': route,
        'MSE': mse,
        'MAE': mae,
        'RMSE': rmse,
        'MAPE': mape,
        'SMAPE': smape
    })

    # 노선별로 파인튜닝된 모델 저장
    adapted_models[route] = adapted_model

     route       date  Passengers
0  ABE_ATL 2022-01-31     1069.87
1  ABE_ATL 2022-02-28      901.35
2  ABE_ATL 2022-03-31     2046.49
3  ABE_ATL 2022-04-30     1755.61
4  ABE_ATL 2022-05-31     1507.64
Date range in original data: 2022-01-31 00:00:00 to 2024-04-30 00:00:00
Data Date Range: 2022-01-31 to 2024-04-30
Date range in sequences data: 2023-01-31 to 2024-04-30
Number of tasks: 841

Meta-training Epoch 1/20
Average Support Loss: 0.6726, Average Query Loss: 1.4561
Meta-training Epoch 1 completed.

Meta-training Epoch 2/20
Average Support Loss: 0.6934, Average Query Loss: 1.3582
Meta-training Epoch 2 completed.

Meta-training Epoch 3/20
Average Support Loss: 0.7321, Average Query Loss: 1.3066
Meta-training Epoch 3 completed.

Meta-training Epoch 4/20
Average Support Loss: 0.8123, Average Query Loss: 1.2538
Meta-training Epoch 4 completed.

Meta-training Epoch 5/20
Average Support Loss: 0.9416, Average Query Loss: 1.2155
Meta-training Epoch 5 completed.

Meta-training Epoch 6/20


In [None]:
# 로컬 모델 학습 및 성능 비교
local_performance_metrics = []
local_models = {}  # 로컬 모델을 저장할 딕셔너리 선언

# 노선 이름순으로 meta_test_tasks 정렬
meta_test_tasks_sorted = sorted(meta_test_tasks, key=lambda x: x['route'])

for task in meta_test_tasks_sorted:
    route = task['route']
    print(f"\nTraining local model for route: {route}")

    # 데이터 가져오기
    X_support = task['X_support']
    y_support = task['y_support']
    X_query = task['X_query']
    y_query = task['y_query']
    route_id = task['support_set']['Route_ID'].iloc[0]
    route_id_tensor = tf.constant([[route_id]], dtype=tf.int32)

    scaler_y = task['scaler_y']  # 노선별 스케일러 사용

    # 로컬 모델 생성
    local_model = create_model()
    optimizer = tf.keras.optimizers.Adam(learning_rate=meta_learning_rate)
    loss_fn = tf.keras.losses.MeanSquaredError()

    # 로컬 모델 학습
    local_model.compile(optimizer=optimizer, loss=loss_fn)
    local_model.fit([X_support, np.repeat(route_id_tensor, len(X_support), axis=0)], y_support,
                    epochs=meta_epochs, verbose=0)

    # 쿼리 세트에서 예측
    query_preds_scaled = local_model([X_query, np.repeat(route_id_tensor, len(X_query), axis=0)], training=False)
    y_pred = scaler_y.inverse_transform(query_preds_scaled.numpy()).flatten()
    y_actual = task['query_set']['Target'].values.flatten()

    # 성능 지표 계산
    mse = mean_squared_error(y_actual, y_pred)
    mae = mean_absolute_error(y_actual, y_pred)
    rmse = np.sqrt(mse)
    # MAPE 계산
    non_zero_indices = y_actual != 0
    if np.any(non_zero_indices):
        mape = np.mean(np.abs((y_actual[non_zero_indices] - y_pred[non_zero_indices]) / y_actual[non_zero_indices])) * 100
    else:
        mape = np.nan

    # SMAPE 계산
    smape = np.mean(2 * np.abs(y_actual - y_pred) / (np.abs(y_actual) + np.abs(y_pred))) * 100

    print(f"Local model performance on route {route}: MSE={mse:.2f}, MAE={mae:.2f}, RMSE={rmse:.2f}, MAPE={mape:.2f}%, SMAPE={smape:.2f}%")

    # 성능 지표 수집
    local_performance_metrics.append({
        'Route': route,
        'MSE': mse,
        'MAE': mae,
        'RMSE': rmse,
        'MAPE': mape,
        'SMAPE': smape
    })
    # 로컬 모델 저장 (여기에 추가)
    local_models[route] = local_model  # 각 노선의 로컬 모델을 딕셔너리에 저장

# 성능 비교
comparison_results = []

# 성능 지표 리스트를 노선 이름순으로 정렬
performance_metrics_sorted = sorted(performance_metrics, key=lambda x: x['Route'])
local_performance_metrics_sorted = sorted(local_performance_metrics, key=lambda x: x['Route'])

for meta_metric, local_metric in zip(performance_metrics_sorted, local_performance_metrics_sorted):
    route = meta_metric['Route']
    meta_mse = meta_metric['MSE']
    meta_mae = meta_metric['MAE']
    meta_rmse = meta_metric['RMSE']
    meta_mape = meta_metric['MAPE']
    meta_smape = meta_metric['SMAPE']

    local_mse = local_metric['MSE']
    local_mae = local_metric['MAE']
    local_rmse = local_metric['RMSE']
    local_mape = local_metric['MAPE']
    local_smape = local_metric['SMAPE']

    # 개선율 계산
    improvement_mse = (local_mse - meta_mse) / local_mse * 100
    improvement_mae = (local_mae - meta_mae) / local_mae * 100
    improvement_rmse = (local_rmse - meta_rmse) / local_rmse * 100
    improvement_mape = (local_mape - meta_mape) / local_mape * 100 if local_mape and meta_mape else np.nan
    improvement_smape = (local_smape - meta_smape) / local_smape * 100 if local_smape and meta_smape else np.nan

    comparison_results.append({
        'Route': route,
        'Meta MSE': meta_mse,
        'Local MSE': local_mse,
        'MSE Improvement (%)': improvement_mse,
        'Meta MAE': meta_mae,
        'Local MAE': local_mae,
        'MAE Improvement (%)': improvement_mae,
        'Meta RMSE': meta_rmse,
        'Local RMSE': local_rmse,
        'RMSE Improvement (%)': improvement_rmse,
        'Meta MAPE': meta_mape,
        'Local MAPE': local_mape,
        'MAPE Improvement (%)': improvement_mape,
        'Meta SMAPE': meta_smape,
        'Local SMAPE': local_smape,
        'SMAPE Improvement (%)': improvement_smape
    })

# 결과를 데이터프레임으로 표시
comparison_df = pd.DataFrame(comparison_results)
print(comparison_df)
comparison_df.to_csv('Comparison_Meta_Tuning_vs_Local.csv')


In [None]:
from scipy.stats import ttest_rel, wilcoxon
import numpy as np
import pandas as pd

# DataFrame에서 필요한 열 추출
mse_improvements = comparison_df['MSE Improvement (%)'].values
mae_improvements = comparison_df['MAE Improvement (%)'].values
rmse_improvements = comparison_df['RMSE Improvement (%)'].values
mape_improvements = comparison_df['MAPE Improvement (%)'].values
smape_improvements = comparison_df['SMAPE Improvement (%)'].values

# 1. 평균 향상도와 향상 비율 분석
mean_mse_improvement = np.mean(mse_improvements)
mean_mae_improvement = np.mean(mae_improvements)
mean_rmse_improvement = np.mean(rmse_improvements)
mean_mape_improvement = np.nanmean(mape_improvements)  # MAPE는 NaN 처리 필요
mean_smape_improvement = np.nanmean(smape_improvements)  # SMAPE는 NaN 처리 필요

mse_improved_ratio = np.sum(mse_improvements > 0) / len(mse_improvements) * 100
mae_improved_ratio = np.sum(mae_improvements > 0) / len(mae_improvements) * 100
rmse_improved_ratio = np.sum(rmse_improvements > 0) / len(rmse_improvements) * 100
mape_improved_ratio = np.sum(~np.isnan(mape_improvements) & (mape_improvements > 0)) / np.sum(~np.isnan(mape_improvements)) * 100
smape_improved_ratio = np.sum(~np.isnan(smape_improvements) & (smape_improvements > 0)) / np.sum(~np.isnan(smape_improvements)) * 100

print("1. Average Improvement:")
print(f"Average MSE Improvement: {mean_mse_improvement:.2f}%")
print(f"Average MAE Improvement: {mean_mae_improvement:.2f}%")
print(f"Average RMSE Improvement: {mean_rmse_improvement:.2f}%")
print(f"Average MAPE Improvement: {mean_mape_improvement:.2f}%")
print(f"Average SMAPE Improvement: {mean_smape_improvement:.2f}%\n")

print("1. Improvement Ratios:")
print(f"MSE Improved in {mse_improved_ratio:.2f}% of routes")
print(f"MAE Improved in {mae_improved_ratio:.2f}% of routes")
print(f"RMSE Improved in {rmse_improved_ratio:.2f}% of routes")
print(f"MAPE Improved in {mape_improved_ratio:.2f}% of routes")
print(f"SMAPE Improved in {smape_improved_ratio:.2f}% of routes\n")

# 2. 통계적 검정 (유의미한 차이 확인)
# 쌍체 t-검정 (정규성 가정)
t_stat_mse, p_value_mse = ttest_rel(comparison_df['Meta MSE'], comparison_df['Local MSE'])
t_stat_mae, p_value_mae = ttest_rel(comparison_df['Meta MAE'], comparison_df['Local MAE'])
t_stat_rmse, p_value_rmse = ttest_rel(comparison_df['Meta RMSE'], comparison_df['Local RMSE'])

# 윌콕슨 검정 (비모수)
_, p_value_mape = wilcoxon(comparison_df['Meta MAPE'].dropna(), comparison_df['Local MAPE'].dropna())
_, p_value_smape = wilcoxon(comparison_df['Meta SMAPE'].dropna(), comparison_df['Local SMAPE'].dropna())

print("2. Statistical Significance Testing Results (p-values):")
print(f"MSE Improvement p-value: {p_value_mse:.4f}")
print(f"MAE Improvement p-value: {p_value_mae:.4f}")
print(f"RMSE Improvement p-value: {p_value_rmse:.4f}")
print(f"MAPE Improvement p-value: {p_value_mape:.4f}")
print(f"SMAPE Improvement p-value: {p_value_smape:.4f}\n")

# 3. 모든 메트릭에서 일관성 확인
# 일관되게 향상된 비율 확인 (각 항목이 모두 양수인 경우만 카운트)
consistent_improvement_count = np.sum(
    (mse_improvements > 0) & 
    (mae_improvements > 0) & 
    (rmse_improvements > 0) & 
    (mape_improvements > 0) & 
    (smape_improvements > 0)
)
consistent_improvement_ratio = consistent_improvement_count / len(mse_improvements) * 100

print("3. Consistent Improvement Check:")
print(f"Routes with consistent improvement across all metrics: {consistent_improvement_ratio:.2f}%\n")

# 결론
if consistent_improvement_ratio > 50 and p_value_mse < 0.05 and p_value_mae < 0.05:
    print("Meta+Tuning model shows significant and consistent improvements over the Local model.")
else:
    print("Improvements are not consistently significant across all metrics.")


# Visualization of predictions for each route in meta-test tasks

In [None]:
#new
# Route mapping from Route_ID to route name
route_mapping = dict(zip(df['Route_ID'], df['route']))

# 데이터 순서상 앞의 10개 노선 선택
unique_routes_ordered = df['route'].unique()
top_routes = unique_routes_ordered[:10].tolist()
print(f"First 10 routes in data order: {top_routes}")

# 첫 10개 노선에 해당하는 meta_test_tasks 필터링
meta_test_tasks_top10 = [task for task in meta_test_tasks if task['route'] in top_routes]

# 첫 10개 노선을 노선 이름에 따라 정렬
meta_test_tasks_sorted = sorted(meta_test_tasks_top10, key=lambda x: x['route'])

# For each route in the top 10 routes
for task in meta_test_tasks_sorted:
    route = task['route']
    route_id = task['support_set']['Route_ID'].iloc[0]
    route_name = route_mapping[route_id]
    print(f"\nEvaluating and plotting results for route: {route_name}")

    # Get the adapted model for the current route
    adapted_model = adapted_models[route]  # 메타모델 + 파인튜닝 모델
    local_model = local_models[route]      # 로컬 모델

    # Get full data for the route from the original DataFrame
    route_data_full = df[df['route'] == route].sort_values('date')

    # Get dates and passenger counts for full data
    dates_full = pd.to_datetime(route_data_full['date']).reset_index(drop=True)
    passengers_full = route_data_full['Passengers'].values.flatten()

    # Get support and query sets
    support_set = task['support_set']
    query_set = task['query_set']

    # Get dates and passenger counts for support set
    dates_support = pd.to_datetime(support_set['Date']).reset_index(drop=True)
    passengers_support = support_set['Target'].values.flatten()

    # Get dates and passenger counts for query set (actual)
    dates_query = pd.to_datetime(query_set['Date']).reset_index(drop=True)
    passengers_query_actual = query_set['Target'].values.flatten()

    # Get predictions on query set from adapted model
    y_pred_scaled_adapted = adapted_model([task['X_query'], np.full((len(task['X_query']), 1), route_id)], training=False)
    passengers_query_pred_adapted = task['scaler_y'].inverse_transform(y_pred_scaled_adapted.numpy()).flatten()

    # Get predictions on query set from local model
    y_pred_scaled_local = local_model([task['X_query'], np.full((len(task['X_query']), 1), route_id)], training=False)
    passengers_query_pred_local = task['scaler_y'].inverse_transform(y_pred_scaled_local.numpy()).flatten()

    # Retrieve performance metrics for the current route
    metrics_adapted = next(item for item in performance_metrics if item['Route'] == route)
    mse_adapted = metrics_adapted['MSE']
    mae_adapted = metrics_adapted['MAE']
    
    rmse_adapted = metrics_adapted['RMSE']
    mape_adapted = metrics_adapted['MAPE']
    smape_adapted = metrics_adapted['SMAPE']

    metrics_local = next(item for item in local_performance_metrics if item['Route'] == route)
    mse_local = metrics_local['MSE']
    mae_local = metrics_local['MAE']

    rmse_local = metrics_local['RMSE']
    mape_local = metrics_local['MAPE']
    smape_local = metrics_local['SMAPE']

    # Plotting
    plt.figure(figsize=(14, 7))

    # Plot full actual data
    plt.plot(dates_full, passengers_full, label='Actual Data', color='blue', marker='.', linestyle='-')

    # Plot support set (training data)
    plt.plot(dates_support, passengers_support, label='Training Data', color='blue', marker='o', linestyle='-')

    # Plot actual query set data
    plt.plot(dates_query, passengers_query_actual, label='Test Actual', color='green', marker='s', linestyle='-')

    # Plot predicted query set data from adapted model
    plt.plot(dates_query, passengers_query_pred_adapted, label='Meta+Tuning Predicted', color='red', linestyle='--', marker='D')

    # Plot predicted query set data from local model
    plt.plot(dates_query, passengers_query_pred_local, label='Local Model Predicted', color='orange', linestyle='--', marker='X')

    # Add connection between Actual (blue) and Test Actual (green)
    plt.plot([dates_support.iloc[-1], dates_query.iloc[0]], 
             [passengers_support[-1], passengers_query_actual[0]], 
             color='green', linestyle='-')

    # Add connections from Actual (blue) to Meta+Tuning Predicted (red) and Local Model Predicted (orange)
    plt.plot([dates_support.iloc[-1], dates_query.iloc[0]], 
             [passengers_support[-1], passengers_query_pred_adapted[0]], 
             color='red', linestyle='--')
    plt.plot([dates_support.iloc[-1], dates_query.iloc[0]], 
             [passengers_support[-1], passengers_query_pred_local[0]], 
             color='orange', linestyle='--')

    # Formatting the x-axis
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    plt.gcf().autofmt_xdate()

    # Set x-axis limits to cover the full date range
    plt.xlim([dates_full.min(), dates_full.max()])

    # Title and labels (including performance metrics)
    plt.xlabel('Date')
    plt.ylabel('Monthly Passengers')
    plt.title(f'Actual vs Predicted Passengers for Route {route_name}\n'
              f'MAML+Tuning MSE: {mse_adapted:.2f}, MAE: {mae_adapted:.2f}, RMSE: {rmse_adapted:.2f}, MAPE: {mape_adapted:.2f}%, SMAPE: {smape_adapted:.2f}%\n'
              f'Local Model MSE: {mse_local:.2f}, MAE: {mae_local:.2f}, RMSE: {rmse_local:.2f}, MAPE: {mape_local:.2f}%, SMAPE: {smape_local:.2f}%')
    plt.legend()
    plt.grid(True)


    # Dynamic Y-Axis Scaling
    all_passengers = np.concatenate([passengers_full, passengers_query_pred_adapted, passengers_query_pred_local])
    y_min = np.min(all_passengers)
    y_max = np.max(all_passengers)
    padding = (y_max - y_min) * 0.1  # 10% padding
    plt.ylim(y_min - padding, y_max + padding)

    plt.tight_layout()
    plt.show()
    #저장 1회만 할것 ####
    plt.savefig(f"route_{route_name}_comparison.png")
    plt.close()

In [None]:
comparison_df

In [None]:
comparison_df["SMAPE Improvement (%)"].plot.hist(bins=100,edgecolor='w')