# Modeling Pipeline
Creating a Pipeline to test out each model

### Importing Libraries

In [3]:
import _pickle as pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

### Loading the Data
Option to use either Dataset by just commenting out the undesired one.

In [4]:
# Top 10 features Dataset
with open("Top-10-Features-Models/top10_df.pkl",'rb') as fp:
    df = pickle.load(fp)
    
# Top 10 Correlated Dataset
# with open("Top-10-Correlation-Models/top10_corr_df.pkl",'rb') as fp:
#     df = pickle.load(fp)

### Scaling the Data

In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

features_df = df.drop(["Decision"], 1)

scaled_df = pd.DataFrame(scaler.fit_transform(features_df), 
                               index=features_df.index, 
                               columns=features_df.columns)

df = scaled_df.join(df.Decision)

### Splitting the Data

In [6]:
X = df.drop(["Decision"], 1)
y = df.Decision

# Train, test, split
X_train, X_test, y_train, y_test = train_test_split(X, y)

## Creating a Pipeline 
Using 10 Different Classification Models

In [7]:
# Importing the 10 models
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

## Preventing error from occuring: XGBoost causes kernel to die.
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
from xgboost import XGBClassifier

### Creating pipelines for each model

In [9]:
# Adaboost
pipe_ada = Pipeline([('clf', AdaBoostClassifier())])

# Gradient Boost
pipe_gb  = Pipeline([('clf', GradientBoostingClassifier())])

# Random Forest
pipe_rf  = Pipeline([('clf', RandomForestClassifier())])

# Decision Tree
pipe_dt  = Pipeline([('clf', DecisionTreeClassifier())])

# Dummy (Baseline)
pipe_dum = Pipeline([('clf', DummyClassifier())])

# K Nearest Neighbors
pipe_knn = Pipeline([('clf', KNeighborsClassifier())])

# Logistic Regression
pipe_lr  = Pipeline([('clf', LogisticRegression())])

# Naive Bayes
pipe_nb  = Pipeline([('clf', GaussianNB())])

# Support Vector Machine
pipe_svm = Pipeline([('clf', SVC())])