In [None]:
# 다중 변수 모델 사용

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# 예제 시계열 데이터 생성
np.random.seed(0)
n = 100
time = np.arange(n)
price = np.cumsum(np.random.randn(n)) + 50

# 특징 행렬 생성 함수 (다변수 회귀를 위해)
def create_features(data, window_size):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])
        y.append(data[i + window_size])
    return np.array(X), np.array(y)

# 윈도우 크기 설정
window_size = 5

# 학습 데이터와 테스트 데이터 분할
split_point = int(n * 0.8)
price_train, price_test = price[:split_point], price[split_point-window_size:]

# 특징 행렬 생성
X_train, y_train = create_features(price_train, window_size)
X_test, y_test = create_features(price_test, window_size)

# 시계열 데이터 그래프
plt.figure(figsize=(14, 8))
plt.plot(time[:split_point], price[:split_point], label='Train Data', color='blue')
plt.plot(time[split_point:], price[split_point:], label='Test Data', color='red')
plt.axvline(x=split_point, color='black', linestyle='--', label='Train/Test Split')
plt.xlabel('Time')
plt.ylabel('Price')
plt.title('Time Series Data with Train/Test Split')
plt.legend()
plt.show()

# 모델 정의
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=0),
    'Support Vector Machine': SVR(kernel='rbf', C=100, gamma=0.1)
}

# 모델 학습 및 예측 (다중 변수 모델)
predictions_train = {}
predictions_test = {}
mse_scores_train = {}
mse_scores_test = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    predictions_train[name] = model.predict(X_train)
    predictions_test[name] = model.predict(X_test)
    mse_scores_train[name] = mean_squared_error(y_train, predictions_train[name])
    mse_scores_test[name] = mean_squared_error(y_test, predictions_test[name])

# 예측 결과 시각화 (학습 데이터)
plt.figure(figsize=(14, 8))
plt.plot(np.arange(len(y_train)), y_train, label='Actual (Train)', color='black')
for name, pred in predictions_train.items():
    plt.plot(np.arange(len(pred)), pred, label=f'{name} (Train)')
plt.title('Model Predictions on Training Data')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()

# 예측 결과 시각화 (테스트 데이터)
plt.figure(figsize=(14, 8))
plt.plot(np.arange(len(y_test)), y_test, label='Actual (Test)', color='black')
for name, pred in predictions_test.items():
    plt.plot(np.arange(len(pred)), pred, label=f'{name} (Test)')
plt.title('Model Predictions on Test Data')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()

# 학습 데이터와 테스트 데이터를 함께 비교
plt.figure(figsize=(14, 8))
plt.plot(time, price, label='Actual', color='black')
for name in models.keys():
    plt.plot(np.arange(window_size, split_point), predictions_train[name], linestyle='dotted', label=f'{name} (Train)')
    plt.plot(np.arange(split_point, n), predictions_test[name], linestyle='dashed', label=f'{name} (Test)')
plt.title('Model Predictions on Training and Test Data')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()

# MSE 출력
print("Mean Squared Error (MSE) for each model on Training Data:")
for name, mse in mse_scores_train.items():
    print(f'{name}: {mse:.4f}')

print("\nMean Squared Error (MSE) for each model on Test Data:")
for name, mse in mse_scores_test.items():
    print(f'{name}: {mse:.4f}')