In [35]:
import numpy as np
from sklearn.model_selection import GridSearchCV
from scipy.ndimage import imread
import glob
import matplotlib.pyplot as plt

%run ../util.ipynb

## Import data based on pipeline type

In [3]:
def import_data(pipeline_type):
    X = np.load("../data_matrices/X.npy")
    Y = np.load("../data_matrices/Y.npy")
    
    if pipeline_type == 'classification':
        Y = np.array([int(np.round(y)) for y in Y])
    return X,Y

In [27]:
def import_data2(pipeline_type):
    X = [imread(x).flatten() for x in glob.glob('../raw_data/*.jpg')]
    Y = np.load("../data_matrices/Y.npy")
    
    if pipeline_type == 'classification':
        Y = np.array([int(np.round(y)) for y in Y])
    return X,Y

## Create model dictionary

In [4]:
def create_model_dict():
    model_dict_reg = {}
    model_dict_clf = {}
    model_dict = {'regression':model_dict_reg, 'classification':model_dict_clf}
    
    model_dict_reg['ridge'] = (ridge_regression_pipeline, ridge_regression_parameters)
    model_dict_reg['lasso'] = (lasso_regression_pipeline, lasso_regression_parameters)
    model_dict_reg['en'] = (elastic_net_regression_pipeline, elastic_net_regression_parameters)
    model_dict_reg['knn'] = (knn_regression_pipeline, knn_regression_parameters)
    
    model_dict_clf['knn'] = (knn_classification_pipeline, knn_classification_parameters)
    model_dict_clf['svm'] = (svm_classification_pipeline, svm_classification_parameters)
    model_dict_clf['lda'] = (lda_classification_pipeline, lda_classification_parameters)
    model_dict_clf['qda'] = (qda_classification_pipeline, qda_classification_parameters)
    
    return model_dict

## Grid Search

In [116]:
def grid_search(X, Y, pipeline_name, pipeline_type):
    pipeline, parameters = model_dict[pipeline_type][pipeline_name]
    if pipeline_type == 'regression':
        grid = GridSearchCV(estimator=pipeline, param_grid=parameters, scoring='neg_mean_squared_error')
        grid.fit(X, Y)
        return -grid.best_score_, grid.best_estimator_
    elif pipeline_type == 'classification':
        grid = GridSearchCV(estimator=pipeline, param_grid=parameters, scoring='accuracy')
        grid.fit(X, Y)
        return grid.best_score_, grid.best_estimator_

## MAIN

In [54]:
%run pipelines.py
%run parameters.py
model_dict = create_model_dict()


pipeline_name = 'ridge'
pipeline_type = 'regression'
# pipeline_type = 'classification'

X,Y = import_data2(pipeline_type)

score, params = grid_search(X, Y, pipeline_name, pipeline_type)

print(score, params)

Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 7.530088740184848e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 9.893472941621136e-18
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 1.0624902771565558e-16
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 2.843846853957198e-17


[[  6.35483687e-06   8.19194223e-07   7.62200072e-06 ...,   1.11100310e-05
   -1.68592595e-06  -2.13826126e-06]]


6.94858930745 Pipeline(memory=None,
     steps=[('scale', StandardScaler(copy=True, with_mean=True, with_std=True)), ('ridge', Ridge(alpha=0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001))])
