<a href="https://colab.research.google.com/github/taylorfogarty/launch/blob/master/Taylor_Cheat_Sheet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import sklearn
from sklearn.linear_model import ElasticNet, Lasso,  BayesianRidge, LassoLarsIC, Ridge, LogisticRegression, RidgeClassifier 
from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier
from sklearn.kernel_ridge import KernelRidge 
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import RobustScaler 
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.model_selection import KFold, cross_val_score, train_test_split 
from sklearn.metrics import mean_squared_error, f1_score, confusion_matrix, classification_report 
import xgboost as xgb 
import lightgbm as lgb
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt
import numpy as np 
import pandas as pd 
from scipy import stats
from scipy.stats import norm, skew
pd.set_option('display.float_format', lambda x: '{:.3f}'.format(x)) #limiting floats output to 3 decimal points

### Types of Regression Models

In [0]:
#Linear Regression
lm = LinearRegression()
lm_fit = lm.fit(X, y)

#Logistic Regression
lr = LogisticRegression()
lr_fit = lr.fit(X,y)

#LASSO Regression
lasso = Lasso(alpha=5)
lasso_fit = lasso.fit(X,y)

#Ridge Regression
ridge = Ridge(alpha=5)
ridge_fit = ridge.fit(X,y)

#Logistic Ridge Regression
ridge2 = RidgeClassifier(alpha=5)
ridge_fit2 = ridge2.fit(X,y)

#Elastic Net Regression
ENet = ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3)
ENet_fit = ENet.fit(X,y)

#LASSO Regression (ROBUST)
lasso = make_pipeline(RobustScaler(), Lasso(alpha =0.0005, random_state=1))

#Elastic Net Regression (ROBUST)
ENet = make_pipeline(RobustScaler(), ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))

#Kernel Ridge Regression
KRR = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)
KRR_fit = KRR.fit(X,y)

#Gradient Boosting Regression (HUBER)
GBoost = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05, max_depth=4, 
                                   max_features='sqrt',min_samples_leaf=15, min_samples_split=10, 
                                   loss='huber', random_state =5)
GBoost_fit = GBoost.fit(X,y)

#XGBoosting
model_xgb = xgb.XGBRegressor(colsample_bytree=0.4603, gamma=0.0468, learning_rate=0.05, max_depth=3, 
                             min_child_weight=1.7817, n_estimators=2200, reg_alpha=0.4640, reg_lambda=0.8571,
                             subsample=0.5213, silent=1, random_state =7, nthread = -1)
XGB_fit = model_xgb.fit(X,y)

#LightGMB
model_lgb = lgb.LGBMRegressor(objective='regression',num_leaves=5,learning_rate=0.05, n_estimators=720,
                              max_bin = 55, bagging_fraction = 0.8,bagging_freq = 5, feature_fraction = 0.2319,
                              feature_fraction_seed=9, bagging_seed=9,min_data_in_leaf =6, min_sum_hessian_in_leaf = 11)
LGB_fit = model_lgb.fit(X,y)

#Random Forest
rand_for = RandomForestClassifier(n_estimators = 500, random_state = 40)
rand_for_fit = rand_for.fit(X,y)

## Functions

###Data Cleaning

In [0]:
#Dummy Variables
bank_train_d = pd.get_dummies(bank_train)
#Drop Variable
bank_train_d = bank_train_d.drop('duration',axis=1)

###Cross Validation

In [0]:
#Holdout
x_train, x_test, y_train, y_test= train_test_split(x,y, test_size=0.10)
#K-Folds
n_folds = 5
kf = KFold(n_folds, shuffle=True, random_state=42).get_n_splits(x)

###Stacked Regression

In [0]:
#Averaged Base Models Procedure 
class AveragingModels(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, models):
        self.models = models
        
    # clones models
    def fit(self, X, y):
        self.models_ = [clone(x) for x in self.models]
        
        # training cloned models
        for model in self.models_:
            model.fit(X, y)

        return self
    
    # averaging predictions of cloned models
    def predict(self, X):
        predictions = np.column_stack([
            model.predict(X) for model in self.models_
        ])
        return np.mean(predictions, axis=1) 

averaged_models = AveragingModels(models = (lasso, KRR, ENet))

#Meta-Model Procedure
class StackingAveragedModels(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, base_models, meta_model, n_folds=5):
        self.base_models = base_models
        self.meta_model = meta_model
        self.n_folds = n_folds
   
    #fit the data on clones of the original models
    def fit(self, X, y):
        self.base_models_ = [list() for x in self.base_models]
        self.meta_model_ = clone(self.meta_model)
        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=156)
        
        #train cloned base models then create out-of-fold predictions
        out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))
        for i, model in enumerate(self.base_models):
            for train_index, holdout_index in kfold.split(X, y):
                instance = clone(model)
                self.base_models_[i].append(instance)
                instance.fit(X[train_index], y[train_index])
                y_pred = instance.predict(X[holdout_index])
                out_of_fold_predictions[holdout_index, i] = y_pred
                
        #train the cloned meta-model using the out-of-fold predictions
        self.meta_model_.fit(out_of_fold_predictions, y)
        return self
   
    #computes predictions of base models and uses the averages as meta-features for meta-model's final predictions
    def predict(self, X):
        meta_features = np.column_stack([
            np.column_stack([model.predict(X) for model in base_models]).mean(axis=1)
            for base_models in self.base_models_ ])
        return self.meta_model_.predict(meta_features)
      
stacked_averaged_models = StackingAveragedModels(base_models = (lasso, KRR, ENet), meta_model = GBoost)

###Feature Selection

In [0]:
#Feature Importance
feat_imp = pd.DataFrame(rand_for.feature_importances_, index=bank_train_num.columns)
print(feat_imp)