In [9]:
from __future__ import division
from collections import defaultdict
import pandas as pd
from glob import glob
import sys
from os import getcwd
import pickle

from brew.base import Ensemble, EnsembleClassifier
from brew.stacking.stacker import EnsembleStack, EnsembleStackClassifier
from brew.combination.combiner import Combiner

from sklearn import clone

from models.scripts.utils import *

### load models and features to train

In [2]:
models_files = glob(getcwd() + '/models/pickles/*.p')
pred_files = glob(getcwd() + '/submissions/*.csv')
loc_outfile = 'outfile.csv'

In [3]:
df_train = pd.read_json("data/drace_train.json")
df_test = pd.read_json("data/drace_test.json")

In [13]:
exclude = ['price', 
           'manager_skill', 
           'manager_skill_bool', 
           'price_vs_median_72',
           'building_id',
           'manager_id',
           'listing_id',
           'n_expensive',
           'n_no_photo'
          ]
feats_to_train = [x for x in df_train.columns.tolist() if df_train[x].dtype not in ['O', '<M8[ns]'] and x not in exclude]
feats_to_train

[u'0_per_72',
 u'100_per_72',
 u'10_per_72',
 u'20_per_72',
 u'30_per_72',
 u'40_per_72',
 u'50_per_72',
 u'60_per_72',
 u'70_per_72',
 u'80_per_72',
 u'90_per_72',
 u'BB_ratio',
 u'Price_P_Room',
 u'allow_pets',
 u'amount_of_caps',
 u'available',
 u'bathroom_listed',
 u'bathrooms',
 u'bedrooms',
 u'buzzword',
 u'created',
 u'created_day',
 u'created_hour',
 u'created_month',
 u'created_year',
 u'dishwash',
 u'dist_to_nearest_college',
 u'dist_to_nearest_tube',
 u'distance_from_midtown',
 u'doorman',
 u'fitness',
 u'furnished',
 u'hardwood',
 u'has_phone',
 u'is_studio',
 u'large_space',
 u'latitude',
 u'laundry',
 u'longitude',
 u'luxurious',
 u'mean_72',
 u'median_72',
 u'n_log_price',
 u'n_num_keyfeat_score',
 u'nofee',
 u'num_description_words',
 u'num_features',
 u'num_photos',
 u'preWar',
 u'price_vs_median_72_new',
 u'quiet_nei',
 u'space_desc',
 u'subway',
 u'weekday_created']

In [5]:
# load the pickled models
models = []
for model in models_files:
    models.append(pickle.load(open(model, "rb" )))

In [17]:
models

[RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
             max_depth=None, max_features='auto', max_leaf_nodes=None,
             min_impurity_split=1e-07, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=1000, n_jobs=1, oob_score=False,
             random_state=None, verbose=0, warm_start=False),
 <xgboost.core.Booster at 0x126e95b10>,
 <brew.base.EnsembleClassifier at 0x13fd92490>,
 <brew.base.EnsembleClassifier at 0x1472ed950>]

### Create ensemble

In [14]:
# Creating Ensemble
ensemble = Ensemble(models)
eclf = EnsembleClassifier(ensemble=ensemble, combiner='mean')
models.extend([eclf])

In [None]:
# Creating Stacking
layer_1 = Ensemble()
layer_2 = Ensemble([clone(clf1)])

stack = EnsembleStack(cv=3)

stack.add_layer(layer_1)
stack.add_layer(layer_2)

sclf = EnsembleStackClassifier(stack, combiner=Combiner('mean'))

clf_list = models.extend([eclf, sclf])
lbl_list = ['Logistic Regression', 'Random Forest', 'RBF kernel SVM', 'Ensemble', 'Stacking']

### Output 

In [15]:
sub = output(df_test, eclf, feats_to_train)

AttributeError: 'Booster' object has no attribute 'classes_'

In [21]:
models[0].classifiers

AttributeError: 'RandomForestClassifier' object has no attribute 'classifiers'

In [None]:
sub.to_csv("ensemble-submission.csv", index=False)