# 02. 모델링

머신러닝 및 딥러닝 모델을 학습하고 평가합니다.

## 사용 모델
- Random Forest
- Deep Learning (Keras)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('..')

from src.data_loader import load_csv
from src.preprocessing import DataPreprocessor, split_train_test
from src.features import FeatureEngineer
from src.models import MealkitLocationModel
from src.visualization import ResultVisualizer

%matplotlib inline

## 1. 데이터 로드 및 전처리

In [None]:
# 데이터 로드
train_df = load_csv('../data/processed/train_dataset.csv')
test_df = load_csv('../data/processed/test_dataset.csv')

print(f'Train shape: {train_df.shape}')
print(f'Test shape: {test_df.shape}')

In [None]:
# 전처리
preprocessor = DataPreprocessor()

# 불필요한 컬럼 제거
drop_cols = ['Unnamed: 0', '매장명', 'target']
drop_cols = [c for c in drop_cols if c in train_df.columns]

train_clean = train_df.drop(columns=drop_cols, errors='ignore')
train_clean = preprocessor.handle_missing_values(train_clean, strategy='zero')

print(f'정제 후 shape: {train_clean.shape}')
train_clean.head()

## 2. 학습/테스트 분리

In [None]:
# 학습/테스트 분리
target_col = 'value'

X_train, X_test, y_train, y_test = split_train_test(
    train_clean,
    target_col=target_col,
    test_size=0.2,
    random_state=42
)

print(f'X_train: {X_train.shape}, X_test: {X_test.shape}')

## 3. Random Forest 모델

In [None]:
# Random Forest 학습
rf_model = MealkitLocationModel(task='classification')
rf_model.train_random_forest(
    X_train, y_train,
    n_estimators=100,
    random_state=42
)

# 평가
rf_results = rf_model.evaluate(X_test, y_test)
print('Random Forest 결과:')
for metric, value in rf_results.items():
    print(f'  {metric}: {value:.4f}')

In [None]:
# 피처 중요도
importance_df = rf_model.get_feature_importance()

viz = ResultVisualizer()
viz.set_output_dir('../outputs/figures')
viz.plot_feature_importance(
    importance_df,
    top_n=15,
    title='Random Forest Feature Importance',
    save_name='rf_feature_importance.png'
)
plt.show()

## 4. 딥러닝 모델

In [None]:
# 딥러닝 모델 (회귀)
dl_model = MealkitLocationModel(task='regression')

try:
    dl_model.train_deep_learning(
        X_train, y_train,
        epochs=100,
        batch_size=8,
        validation_split=0.2,
        verbose=1
    )
    
    # 평가
    dl_results = dl_model.evaluate(X_test, y_test)
    print('\nDeep Learning 결과:')
    for metric, value in dl_results.items():
        print(f'  {metric}: {value:.4f}')
except ImportError:
    print('TensorFlow가 설치되지 않았습니다.')

## 5. 모델 저장

In [None]:
# 모델 저장
rf_model.save('../outputs/models/random_forest.pkl')
print('모델이 저장되었습니다: outputs/models/random_forest.pkl')