In [32]:
    import os
    import sys
    import numpy as np
    import pandas as pd
    import sklearn.preprocessing as pp
    from sklearn.cross_validation import train_test_split
    from sklearn import svm, linear_model, naive_bayes
    import simplejson as json
    from sklearn.model_selection import GridSearchCV
    from sklearn.pipeline import make_union, make_pipeline
    from sklearn.preprocessing import FunctionTransformer
    from sklearn.base import BaseEstimator, TransformerMixin
    from sklearn.model_selection import GridSearchCV

    # from https://medium.com/@literallywords/sklearn-identity-transformer-fcc18bac0e98
    class IdentityTransformer(BaseEstimator, TransformerMixin):
        def __init__(self):
            pass

        def fit(self, input_array, y=None):
            return self

        def transform(self, input_array, y=None):
            return input_array * 1

    # Get training data
    df = pd.read_csv('pima-data.csv')
    
    float_formatter = lambda x: "%.2f" % x
    np.set_printoptions(formatter={'float_kind':float_formatter})
    
    feature_col_names_fullset = ['num_preg', 'glucose_conc', 'diastolic_bp', 'thickness', 'insulin', 'bmi', 'diab_pred', 'age']
    feature_col_names = ['num_preg', 'glucose_conc', 'bmi', 'diab_pred']
    predicted_class_names = ['diabetes']
    
    x = df[feature_col_names].values
    y = df[predicted_class_names].values
    
    fill_0 = pp.Imputer(missing_values=0, strategy="mean", axis=0)
    scaler = pp.StandardScaler()
    
    def get_invalid0_cols(df):
        return df[['glucose_conc', 'diastolic_bp', 'thickness', 'insulin', 'bmi', 'diab_pred', 'age']]

    def get_valid0_cols(df):
        return df[['num_preg']]

    vec = make_union(*[
        make_pipeline(FunctionTransformer(get_valid0_cols, validate=False), IdentityTransformer()),
        make_pipeline(FunctionTransformer(get_invalid0_cols, validate=False), fill_0),
        #make_pipeline(FunctionTransformer(get_num_cols, validate=False), scaler)
    ])
    
    x = vec.fit_transform(df)
    #x = scaler.fit_transform(x)
    
    split_test_size = 0.20
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=split_test_size, random_state=73) 

    scaler = pp.StandardScaler()

    # Train model - GridSearch Optimized NuSVC
    def getNuSVCModel():
        global x_train
        global x_test
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.fit_transform(x_test)
        nuSvc_model = svm.NuSVC()
        parameters = {
            'kernel':('linear', 'rbf'), 
            'nu': np.arange( 0.01, 0.7, 0.01 ).tolist(),
            'gamma': np.arange( 0.01, 3.0+0.0, 0.1 ).tolist()}
        clf = GridSearchCV(nuSvc_model, parameters)
        clf.fit(x_train, y_train.ravel()) 
        return clf


    clf = getNuSVCModel()
    
    
    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(x_test)
    print(classification_report(y_true, y_pred))
    print()

Best parameters set found on development set:

{'nu': 0.47000000000000003, 'gamma': 0.01, 'kernel': 'linear'}

Grid scores on development set:

0.347 (+/-0.079) for {'nu': 0.01, 'gamma': 0.01, 'kernel': 'linear'}
0.471 (+/-0.278) for {'nu': 0.02, 'gamma': 0.01, 'kernel': 'linear'}
0.373 (+/-0.181) for {'nu': 0.03, 'gamma': 0.01, 'kernel': 'linear'}
0.513 (+/-0.294) for {'nu': 0.04, 'gamma': 0.01, 'kernel': 'linear'}
0.537 (+/-0.116) for {'nu': 0.05, 'gamma': 0.01, 'kernel': 'linear'}
0.484 (+/-0.116) for {'nu': 0.060000000000000005, 'gamma': 0.01, 'kernel': 'linear'}
0.565 (+/-0.216) for {'nu': 0.06999999999999999, 'gamma': 0.01, 'kernel': 'linear'}
0.477 (+/-0.227) for {'nu': 0.08, 'gamma': 0.01, 'kernel': 'linear'}
0.498 (+/-0.323) for {'nu': 0.09, 'gamma': 0.01, 'kernel': 'linear'}
0.546 (+/-0.290) for {'nu': 0.09999999999999999, 'gamma': 0.01, 'kernel': 'linear'}
0.585 (+/-0.257) for {'nu': 0.11, 'gamma': 0.01, 'kernel': 'linear'}
0.493 (+/-0.228) for {'nu': 0.12, 'gamma': 0.01, 'k

NameError: name 'classification_report' is not defined