In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from hyperopt import hp, fmin, tpe, Trials, space_eval, STATUS_OK
from data_process.column_schema import (PROPERTIES_RENAME_DICT, TRANSACTION_RENAME_DICT, 
                                        NUMERICAL_COLS, CATEGORICAL_COLS)
from data_process.data_process_pipeline import DataProcessPipeline
from models.nn_models.dnn import DNN
from lightgbm import LGBMRegressor
from sklearn.linear_model import ElasticNet
from models.tree_models.lgbm import LGBM
from models.backtest import BackTest
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

Using TensorFlow backend.


# Prepare data

In [2]:
df_all = pd.read_csv('/Users/shuyangdu/Desktop/ZillowChallenge/data/df_merged.csv')

## Data process

In [3]:
data_pipeline = DataProcessPipeline(encode_mode='label', use_scale=True)

In [4]:
df = data_pipeline.pre_process(df_all)

# Construct Model

## ElasticNet

In [7]:
model = ElasticNet(alpha=90, l1_ratio=0.85)

## LightGBM

In [5]:
params = {
    'max_bin': 80,
    'learning_rate': 0.0116,
    'boosting_type': 'gbdt',
    'objective': 'regression_l1',
    'feature_fraction': 0.94,
    'bagging_fraction': 0.85,
    'bagging_freq': 80,
    'num_leaves': 110,
    'lambda_l2': 86.9,
    'n_estimators': 450,
}

In [6]:
model = LGBM(
    feature_name=data_pipeline.original_feature_cols,
    categorical_feature=data_pipeline.categorical_cols,
    **params
)

## NeuralNetwork

In [7]:
model = DNN(dim_hidden_lst=[30], learning_rate=0.01, decay=0.0001,
            batch_size=128, epochs=5, verbose=0)

# Back test

In [7]:
backtest = BackTest(model=model, data_process_pipeline=data_pipeline)

## LightGBM

In [17]:
backtest.full_cv(df)

0.067105600663580456

## ElasticNet

In [10]:
backtest.full_cv(df)

0.068458717051321921

## NeuralNetwork

In [9]:
backtest.full_cv(df)

0.06844671392965937