In [1]:
import sys
sys.path.append('/Users/tompease/Documents/Coding/airbnb')
from sklearn.ensemble import GradientBoostingClassifier
from utils.tune_hyperparameters import ClassificationTuneModel
from utils.data_loader import AirbnbLoader
from scipy.stats import randint
from sklearn.preprocessing import LabelEncoder


In [2]:
loader = AirbnbLoader()
X, y = loader.load_airbnb('Category', normalized=True)

encoder = LabelEncoder()
y = encoder.fit_transform(y)

grad_boost_param_dist = {
  'learning_rate' : [0.001, 0.01, 0.1, 1, 10],
  'n_estimators': randint(1,1000),
  'min_samples_split': randint(1,10),
  'min_samples_leaf': randint(1,20),
  'min_weight_fraction_leaf': [0.0, 0.0001, 0.001, 0.1],
  'max_depth': randint(1,5),
  'max_features': ['sqrt', 'log2', None]
}

tuned_random_forest = ClassificationTuneModel(X, y, GradientBoostingClassifier, param_grid=grad_boost_param_dist, method='random', n_iter=500)

In [3]:
tuned_random_forest.default_model_performance_

{'params': {'ccp_alpha': 0.0,
  'criterion': 'friedman_mse',
  'init': None,
  'learning_rate': 0.1,
  'loss': 'log_loss',
  'max_depth': 3,
  'max_features': None,
  'max_leaf_nodes': None,
  'min_impurity_decrease': 0.0,
  'min_samples_leaf': 1,
  'min_samples_split': 2,
  'min_weight_fraction_leaf': 0.0,
  'n_estimators': 100,
  'n_iter_no_change': None,
  'random_state': None,
  'subsample': 1.0,
  'tol': 0.0001,
  'validation_fraction': 0.1,
  'verbose': 0,
  'warm_start': False},
 'train accuracy': 0.866092943201377,
 'test accuracy': 0.35903614457831323}

In [10]:
tuned_random_forest.tuned_performance_

{'params': {'learning_rate': 0.01,
  'max_depth': 2,
  'max_features': 'log2',
  'min_samples_leaf': 2,
  'min_samples_split': 4,
  'min_weight_fraction_leaf': 0.0,
  'n_estimators': 589},
 'train': 0.5903614457831325,
 'test': 0.3903614457831326}

In [11]:
tuned_random_forest.save_model_and_results()