### Thanks to https://github.com/JayHong99
### Hyperparamter tunning -> pytorch에도 적용되는지 확인해보기 

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
import warnings
warnings.simplefilter('ignore')
warnings.filterwarnings('ignore')

# I.Preprocessing

## 1. Load Data

In [2]:
train_df = pd.read_csv('/kaggle/input/tabular-playground-series-mar-2022/train.csv', index_col=0)
test_df = pd.read_csv('/kaggle/input/tabular-playground-series-mar-2022/test.csv',index_col=0)

## 2. Feature Generation

- month
- Weekday to binary (Mon-Fri) : 0 / (Sat, Sun) : 1

In [3]:
%%time
train_df['time'] = pd.to_datetime(train_df['time'])
train_df['month'] = train_df['time'].dt.month
train_df['day'] = train_df['time'].dt.day

train_df['dayofweek'] = train_df['time'].apply(lambda x: x.weekday()) #0 means Monday..
#주중, 주말 0 means weekday, 1 means weekend
train_df['weekday_weekend'] = train_df['dayofweek'].apply(lambda x: 0 if 0<= x <=4 else 1)

train_df.drop(columns = ['time','day'], inplace = True)
train_df.head()

In [4]:
%%time
test_df['time'] = pd.to_datetime(test_df['time'])
test_df['month'] = test_df['time'].dt.month
test_df['day'] = test_df['time'].dt.day

test_df['dayofweek'] = test_df['time'].apply(lambda x: x.weekday()) #0 means Monday..
#주중, 주말 0 means weekday, 1 means weekend
test_df['weekday_weekend'] = test_df['dayofweek'].apply(lambda x: 0 if 0<= x <=4 else 1)

test_df = test_df.drop(columns = ['time', 'day']).reset_index(drop = True) # for concatenate with one-hot encoded data
test_df.head()

## 3. Encoding

### month, dayofweek, direction => One-Hot Encoding
- Month : 4~9 : 6 columns
- dayofweek : 0~6 : 7 columns

In [5]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder()
encoding_columns = ['month','dayofweek','direction']
encoder.fit(train_df[encoding_columns])

onehot_columns = encoder.get_feature_names_out(encoding_columns)

train_onehot = encoder.transform(train_df[encoding_columns]).toarray()
train_onehot = pd.DataFrame(train_onehot, columns = onehot_columns)

test_onehot = encoder.transform(test_df[encoding_columns]).toarray()
test_onehot = pd.DataFrame(test_onehot, columns = onehot_columns)

train_df = pd.concat([train_df, train_onehot], axis = 1).drop(columns = encoding_columns)
test_df = pd.concat([test_df, test_onehot], axis = 1).drop(columns = encoding_columns)
train_df.head()

## 4. Setting Data

In [6]:
train_df

In [7]:
X_train = train_df.drop(columns = ['congestion'])
y_train = train_df['congestion']
X_test = test_df.copy()

print(X_train.shape, y_train.shape, X_test.shape)

# II. Modeling with LGBM


- LGBM's important parameters
    - num_leaves : number of leaves -> to prevent overfitting
    - max_depth : depth of model -> to prevent overfitting
    - n_estimators : number of trees -> to prevent overfitting & generalization
    - learning_rate : lr of gradient boosting method -> to speed up

## Library

In [8]:
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV, RandomizedSearchCV
from bayes_opt import BayesianOptimization

import optuna
from optuna import Trial
from optuna.samplers import TPESampler

## Setting
- model is LGBM Regressor due to it's speed & it's performace of sparse data
- CV method is KFold for generalization
- KFold without shuffle as data is Sequential

In [9]:
model = LGBMRegressor()
kf = KFold(n_splits=5, shuffle=False)

## 1. Grid Search CV

- Grid Search CV must give grid of params as dictionary
- grid_search looks every grid
    - in this case, num_leaves(3 grid) * max_depth(3 grid) * n_estimators(3 grid) * lr(2 grid) = 54 times
- pros : can get general information of hyper params
- cons : it takes long time and the best param could exist out of grid

### Set Param Grid

In [10]:
grid_search_params = {'num_leaves' : [16, 32, 64,],
                     'max_depth' : [4,5,6],
                     'n_estimators' : [100,200,300],
                     'learning_rate' : [5e-2, 1e-3],
                     }

### Fit

In [None]:
%%time 
grid_lgbm = GridSearchCV(model, param_grid= grid_search_params, scoring = 'neg_mean_absolute_error', cv = kf)
grid_lgbm.fit(X_train, y_train)

### Result

In [None]:
grid_lgbm.best_params_, grid_lgbm.best_score_ # get best param & it's score

In [None]:
best_grid_param = grid_lgbm.best_params_
grid_lgbm = LGBMRegressor(**best_grid_param)

## 2. RandomSearchCV
- RandomSearchCV must give range of params as dictionary
- random_search looks params randomly
    - in this case, n_iter = 30
- pros : takes much short time than grid_search_cv & can get best hyper param if lucky
- cons : if unlucy, bad hyper params

### Param Range

In [None]:
random_search_params = {'num_leaves' : range(8,64),
                         'max_depth' : range(2,7),
                         'n_estimators' : range(50,300),
                         'learning_rate' : (1e-3, 5e-2),
                     }

### Fit

In [None]:
%%time
random_lgbm = RandomizedSearchCV(model, 
                                param_distributions= random_search_params, 
                                n_iter=30, 
                                scoring='neg_mean_absolute_error', 
                                cv = kf) 
random_lgbm.fit(X_train,y_train)

### Result

In [None]:
random_lgbm.best_params_, random_lgbm.best_score_

In [None]:
best_random_param = random_lgbm.best_params_
random_lgbm = LGBMRegressor(**best_random_param)

## 3. Bayesian Optimization
- Bayesian Optimization must give range of params as dictionary    
- Search Randomly init_times(10 times) -> Search Best Params n_iter(10 times) with Bayesian Statistics based on Random Param Result

### Setting Param Range

In [None]:
bayes_search_params = {'num_leaves' : (8,64),
                         'max_depth' : (2,20),
                         'n_estimators' : (50,300),
                         'learning_rate' : (0.01, 0.05),
                     }

### Setting Optimization Function

In [None]:
def lgbm_opt(num_leaves, max_depth, n_estimators, learning_rate) : 
    params = {'num_leaves' : int(num_leaves),
              'max_depth' : int(max_depth),
              'n_estimators' : int(n_estimators),
              'learning_rate' : learning_rate}
    model = LGBMRegressor(**params)
    scores = cross_val_score(model, X_train, y_train, scoring = 'neg_mean_absolute_error', cv = kf)
    score = np.mean(scores)
    return score

### Fit

In [None]:
%%time
lgbmBO = BayesianOptimization(f = lgbm_opt, pbounds = bayes_search_params, verbose = 2, random_state = 0 )
lgbmBO.maximize(init_points=10, n_iter = 10)

In [None]:
BO_params = lgbmBO.max.get('params')
BO_params = {key : int(value) for key, value in BO_params.items() if key != "learning_rate"}
BO_params['learning_rate'] = lgbmBO.max.get('params').get('learning_rate')
BO_params, lgbmBO.max.get('target')

In [None]:
bayes_lgbm = LGBMRegressor(**BO_params)

## 4. Optuna
- Hyperparameter Optimize Framework
- easy to use
- good performance

## Setting

In [None]:
def lgbm_optuna(trial: Trial, X, y, test):
    param = {
        'num_leaves' : trial.suggest_int('n_estimators', 8,64),
        'max_depth' : trial.suggest_int('max_depth', 2,20),
        'n_estimators' : trial.suggest_int('n_estimators', 50,300),
        'learning_rate' : trial.suggest_discrete_uniform('learning_rate', 0.01 , 0.05, 0.005)
            }
    
    # 학습 모델 생성
    model = LGBMRegressor(**param)
    model = model.fit(X, y) # 학습 진행
    
    # 모델 성능 확인
    scores = cross_val_score(model, X_train, y_train, scoring = 'neg_mean_absolute_error', cv = kf)
    score = np.mean(scores)
    
    return score

## Fit

In [None]:
study = optuna.create_study(direction='maximize', sampler=TPESampler())

# n_trials 지정해주지 않으면, 무한 반복
study.optimize(lambda trial : lgbm_optuna(trial, X_train, y_train, None), n_trials = 30)

study.best_trial.params, study.best_trial.value

## Result

In [None]:
optuna_param = study.best_trial.params
optuna_lgbm = LGBMRegressor(**optuna_param)

# III. Predict

In [None]:
models = [grid_lgbm, random_lgbm, bayes_lgbm, optuna_lgbm]

test_preds = []
for model in models : 
    model.fit(X_train, y_train)
    test_pred = model.predict(X_test)
    test_preds.append(test_pred)

In [None]:
np.corrcoef(test_preds)