In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm

In [None]:
#set your path
train_path = ''
test_path = ''

train = pd.read_csv(train_path,sep = ',',header=None)
test = pd.read_csv(test_path,sep = ',',header=None)

In [None]:
train.head()

In [None]:
y_train = train.iloc[:,-1]
X_train = train.drop(train.columns[-1], axis=1)

y_test = test.iloc[:,-1]
X_test = test.drop(test.columns[-1], axis=1)

# Construct pipelines


In [None]:
# Construct some pipelines
pipe_lr = Pipeline([('scl', StandardScaler()),
    ('clf', LogisticRegression(random_state=42))])

pipe_rf = Pipeline([('scl', StandardScaler()),
        ('clf', RandomForestClassifier(random_state=42))])

pipe_svm = Pipeline([('scl', StandardScaler()),
        ('clf', svm.SVC(random_state=42))])

In [None]:
# Set grid search params
param_range = [1,4]

grid_params_lr = [{'clf__penalty': ['l1', 'l2']}]


grid_params_rf = [{

        'clf__max_depth': param_range}
                 ]

In [None]:
# Construct grid searches
jobs = 1-1

gs_lr = GridSearchCV(estimator=pipe_lr,
        param_grid=grid_params_lr,
        scoring='accuracy',
        cv=5) 


gs_rf = GridSearchCV(estimator=pipe_rf,
        param_grid=grid_params_rf,
        scoring='accuracy',
        cv=5, 
        n_jobs=jobs)

In [None]:
# List of pipelines for ease of iteration
grids = [gs_lr, gs_rf]

# Dictionary of pipelines and classifier types for ease of reference
grid_dict = {0: 'Logistic Regression', 
1: 'Random Forest'}

# Training with Grid Search

In [None]:
# Fit the grid search objects
print('Performing model optimizations...')
best_acc = 0.0
best_clf = 0
best_gs = ''
for idx, gs in enumerate(grids):
    print('\nEstimator: %s' % grid_dict[idx])	
    # Fit grid search	
    gs.fit(X_train, y_train)
    # Best params
    print('Best params: %s' % gs.best_params_)
    # Best training data accuracy
    print('Best training accuracy: %.3f' % gs.best_score_)
    # Predict on test data with best params
    y_pred = gs.predict(X_test)
    # Test data accuracy of model with best params
    print('Test set accuracy score for best params: %.3f ' % accuracy_score(y_test, y_pred))
    # Track best (highest test accuracy) model
    if accuracy_score(y_test, y_pred) > best_acc:
        best_acc = accuracy_score(y_test, y_pred)
        best_gs = gs
        best_clf = idx
print('\nClassifier with best test set accuracy: %s' % grid_dict[best_clf])