In [1]:
# Load libraries
import numpy
from numpy import arange
from matplotlib import pyplot
from pandas import read_csv
from pandas import set_option
from pandas.tools.plotting import scatter_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Imputer
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor

from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint
from keras.wrappers.scikit_learn import KerasRegressor
%matplotlib inline

Using TensorFlow backend.


In [2]:
# Load full dataset
filename = 'ssdata_for_train_common.txt'
dataset_full = read_csv(filename, sep='\t')
dataset_full.shape

(1257780, 87)

In [3]:
# Select features to be used to build model
cols = ["Site.Type","TA.raw","SPS.raw","sRNA1A.scaled","sRNA1C.scaled","sRNA1G.scaled","sRNA8A.scaled","sRNA8C.scaled","sRNA8G.scaled","site8A.scaled","site8C.scaled","site8G.scaled","local.AU.raw","X3..pairing.raw","SA.raw","Min_dist.raw","PCT.raw","ORF.length.raw","X3.UTR.length.raw","Offset.6mer.raw","ORF.8mer.raw","transfection_final_conc_curated","trans_reagent","genetic_background","hours","GPL","Series","cell","log2fc"]
dataset_trim = dataset_full[cols]
dataset_trim.shape

(1257780, 29)

In [4]:
dataset_trim.head()

Unnamed: 0,Site.Type,TA.raw,SPS.raw,sRNA1A.scaled,sRNA1C.scaled,sRNA1G.scaled,sRNA8A.scaled,sRNA8C.scaled,sRNA8G.scaled,site8A.scaled,...,Offset.6mer.raw,ORF.8mer.raw,transfection_final_conc_curated,trans_reagent,genetic_background,hours,GPL,Series,cell,log2fc
0,7mer-m8,3.704,-8.8,0,0,0,0,0,1,0,...,1,0,2.5e-08,Lipofectamine2000,wildtype,48.0,GPL570,GSE27431,HEY,0.167583
1,7mer-1a,3.621,-6.43,0,0,0,0,1,0,0,...,0,0,2.5e-08,Lipofectamine2000,wildtype,48.0,GPL570,GSE27431,HEY,0.353088
2,6mer,3.539,-5.66,0,0,0,0,0,0,0,...,0,0,5e-08,HiPerFect,wildtype,40.0,GPL4133,GSE28522,HeLa,0.275285
3,6mer,3.539,-5.66,0,0,0,0,0,0,0,...,0,0,5e-08,HiPerFect,wildtype,40.0,GPL4133,GSE28522,TW01,-0.023325
4,6mer,3.569,-3.58,0,0,0,1,0,0,0,...,1,0,,HiPerFect,wildtype,96.0,GPL13607,GSE29207,hMADS,0.00955


In [5]:
# Summarize Data

# shape
print(dataset_trim.shape)
# types
print(dataset_trim.dtypes)

(1257780, 29)
Site.Type                           object
TA.raw                             float64
SPS.raw                            float64
sRNA1A.scaled                        int64
sRNA1C.scaled                        int64
sRNA1G.scaled                        int64
sRNA8A.scaled                        int64
sRNA8C.scaled                        int64
sRNA8G.scaled                        int64
site8A.scaled                        int64
site8C.scaled                        int64
site8G.scaled                        int64
local.AU.raw                       float64
X3..pairing.raw                    float64
SA.raw                             float64
Min_dist.raw                       float64
PCT.raw                            float64
ORF.length.raw                     float64
X3.UTR.length.raw                  float64
Offset.6mer.raw                      int64
ORF.8mer.raw                         int64
transfection_final_conc_curated    float64
trans_reagent                       obje

### Preprocess Categorical Data

In [6]:
# Show classes in the features in object type, which must be transformed into digits.
obj_cols = ['Site.Type', "trans_reagent","genetic_background","GPL","Series","cell",]
for i, v in enumerate(obj_cols):
    print dataset_trim.groupby(v).size()
    print 

Site.Type
6mer       663452
7mer-1a    230616
7mer-m8    265919
8mer-1a     97793
dtype: int64

trans_reagent
Amaxa nucleofection             7284
Cell Line Nucleofector kit     30919
DharmaFECT                     22992
DharmaFECT3                     8502
HiPerFect                      66767
INTERFERin                      2575
Lentiviral vector              72953
Lipofectamin RNAi MAX         406776
Lipofectamine2000             378591
MicroPorator MP-100             2528
Neon transfection System        3642
Oligofectamine                 20673
PureFection                     6704
Retroviral vector              13148
SilentFect                     28828
Viral vector                    3042
X-tremeGENE siRNA               3857
siPORT neo-FX                  32412
dtype: int64

genetic_background
Dicer        119591
wildtype    1138189
dtype: int64

GPL
GPL10332    128884
GPL10558    127171
GPL10739      2972
GPL10904      7124
GPL13158      7466
GPL13497      8884
GPL13607    110402


In [7]:
dataset_enc = dataset_trim
mlb = []
obj_cols = ['Site.Type', "trans_reagent","genetic_background","GPL","Series","cell",]
for i, v in enumerate(obj_cols):    
    mlb.append(LabelEncoder())
    mlb[i].fit(dataset_trim[v])
    dataset_enc[v] = mlb[i].transform(dataset_trim[v])
    print "Number of classes for " + v + " : " + str(len(mlb[i].classes_))
    #print mlb[i].classes_
dataset_enc.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of classes for Site.Type : 4


  flag = np.concatenate(([True], aux[1:] != aux[:-1]))
  return aux[:-1][aux[1:] == aux[:-1]]


Number of classes for trans_reagent : 19
Number of classes for genetic_background : 2
Number of classes for GPL : 44
Number of classes for Series : 176
Number of classes for cell : 126


Unnamed: 0,Site.Type,TA.raw,SPS.raw,sRNA1A.scaled,sRNA1C.scaled,sRNA1G.scaled,sRNA8A.scaled,sRNA8C.scaled,sRNA8G.scaled,site8A.scaled,...,Offset.6mer.raw,ORF.8mer.raw,transfection_final_conc_curated,trans_reagent,genetic_background,hours,GPL,Series,cell,log2fc
0,2,3.704,-8.8,0,0,0,0,0,1,0,...,1,0,2.5e-08,9,1,48.0,25,59,35,0.167583
1,1,3.621,-6.43,0,0,0,0,1,0,0,...,0,0,2.5e-08,9,1,48.0,25,59,35,0.353088
2,0,3.539,-5.66,0,0,0,0,0,0,0,...,0,0,5e-08,5,1,40.0,19,63,43,0.275285
3,0,3.539,-5.66,0,0,0,0,0,0,0,...,0,0,5e-08,5,1,40.0,19,63,112,-0.023325
4,0,3.569,-3.58,0,0,0,1,0,0,0,...,1,0,,5,1,96.0,6,66,121,0.00955


### Deal with NaN

In [8]:
dataset_unfilled = dataset_enc
dataset_unfilled.isnull().sum()

Site.Type                               0
TA.raw                                  0
SPS.raw                                 0
sRNA1A.scaled                           0
sRNA1C.scaled                           0
sRNA1G.scaled                           0
sRNA8A.scaled                           0
sRNA8C.scaled                           0
sRNA8G.scaled                           0
site8A.scaled                           0
site8C.scaled                           0
site8G.scaled                           0
local.AU.raw                            0
X3..pairing.raw                         0
SA.raw                                  0
Min_dist.raw                            0
PCT.raw                            239268
ORF.length.raw                          0
X3.UTR.length.raw                       0
Offset.6mer.raw                         0
ORF.8mer.raw                            0
transfection_final_conc_curated    361102
trans_reagent                           0
genetic_background                

In [12]:
# Imputing NaN with mean value of the entire column

imr = Imputer(missing_values='NaN', strategy='mean', axis=0)
imr = imr.fit(dataset_unfilled)
dataset = imr.transform(dataset_unfilled)

In [None]:
seed = 7
numpy.random.seed(seed=seed)
rand_ix=numpy.random.randint(1, dataset.shape[0], size=30000)
rand_ix

In [45]:
# Prepare Data

# Split-out validation dataset

# Use Entire Data
#X = dataset[:,0:28]
#Y = dataset[:,28]

# Use only partial Data
X = dataset[rand_ix,0:28]
Y = dataset[rand_ix,28]

validation_size = 0.20
X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size=validation_size, random_state=seed)

In [36]:
input_dim = 28

def plot_learning_curve(history):    
    pyplot.plot(history.history['loss'])
    pyplot.plot(history.history['val_loss'])
    pyplot.title('model loss - Mean Squared Error(MSE)')
    pyplot.ylabel('loss')
    pyplot.xlabel('epoch')
    pyplot.legend(['train', 'validation'], loc='upper right')
    pyplot.show()

# define base model
def baseline_model():
	# create model
	model = Sequential()
	model.add(Dense(input_dim, input_dim=input_dim, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal'))
	# Compile model
	model.compile(loss='mean_squared_error', optimizer='adam')
	return model

def larger_model():
	# create model
	model = Sequential()
	model.add(Dense(input_dim, input_dim=input_dim, kernel_initializer='normal', activation='relu'))
	model.add(Dense(6, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal'))
	# Compile model
	model.compile(loss='mean_squared_error', optimizer='adam')
	return model

# define wider model
def wider_model():
	# create model
	model = Sequential()
	model.add(Dense(128, input_dim=input_dim, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal'))
	# Compile model
	model.compile(loss='mean_squared_error', optimizer='adam')
	return model

def more_larger_model():
	# create model
	model = Sequential()
	model.add(Dense(input_dim, input_dim=input_dim, kernel_initializer='normal', activation='relu'))
	model.add(Dense(64, kernel_initializer='normal', activation='relu'))
	model.add(Dense(32, kernel_initializer='normal', activation='relu'))
	model.add(Dense(8, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal'))
	# Compile model
	model.compile(loss='mean_squared_error', optimizer='adam')
	return model

In [None]:
# Evaluate Algorithms
# Test options and evaluation metric
num_folds = 5
seed = 7
scoring = 'neg_mean_squared_error'

# Spot Check Algorithms
models = []
models.append(('LR', LinearRegression()))
models.append(('RIDGE', Ridge()))
models.append(('LASSO', Lasso()))
models.append(('EN', ElasticNet()))
models.append(('KNN', KNeighborsRegressor()))
models.append(('CART', DecisionTreeRegressor()))
models.append(('SVR', SVR()))
models.append(('XGB', XGBRegressor()))
models.append(('FFNN', KerasRegressor(build_fn=baseline_model, epochs=2000, batch_size=1024, verbose=0)))

# evaluate each model in turn
results = []
names = []
for name, model in models:
	kfold = KFold(n_splits=num_folds, random_state=seed)
	cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
	results.append(cv_results)
	names.append(name)
	msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
	print(msg)

# Compare Algorithms
fig = pyplot.figure(figsize=(10, 7))
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()

LR: -0.612589 (0.045929)
RIDGE: -0.612588 (0.045929)
LASSO: -0.625626 (0.049576)
EN: -0.624634 (0.049577)
KNN: -0.432357 (0.018166)
CART: -0.800002 (0.041370)


In [None]:
# Test options and evaluation metric
num_folds = 3
seed = 7
scoring = 'neg_mean_squared_error'

# Standardize the dataset
pipelines = []
pipelines.append(('ScaledLR', Pipeline([('Scaler', StandardScaler()),('LR', LinearRegression())])))
pipelines.append(('ScaledRIDGE', Pipeline([('Scaler', StandardScaler()),('RIDGE', Ridge())])))
pipelines.append(('ScaledLASSO', Pipeline([('Scaler', StandardScaler()),('LASSO', Lasso())])))
pipelines.append(('ScaledEN', Pipeline([('Scaler', StandardScaler()),('EN', ElasticNet())])))
pipelines.append(('ScaledKNN', Pipeline([('Scaler', StandardScaler()),('KNN', KNeighborsRegressor())])))
pipelines.append(('ScaledCART', Pipeline([('Scaler', StandardScaler()),('CART', DecisionTreeRegressor())])))
pipelines.append(('ScaledSVR', Pipeline([('Scaler', StandardScaler()),('SVR', SVR())])))
pipelines.append(('ScaledXGB', Pipeline([('Scaler', StandardScaler()),('XGB', XGBRegressor())])))
pipelines.append(('ScaledFFNN', Pipeline([('Scaler', StandardScaler()),('FFNN', KerasRegressor(build_fn=baseline_model, epochs=2000, batch_size=1024, verbose=0))])))

results = []
names = []

for name, model in pipelines:
	kfold = KFold(n_splits=num_folds, random_state=seed)
	cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
	results.append(cv_results)
	names.append(name)
	msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
	print(msg)

# Compare Algorithms
fig = pyplot.figure(figsize=(10, 7))
fig.suptitle('Scaled Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()

In [43]:
# grid search
max_depth = range(1, 11, 2)
print(max_depth)
param_grid = dict(max_depth=max_depth)
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)
grid_search = GridSearchCV(model, param_grid, scoring="neg_mean_squared_error", n_jobs=-1, cv=kfold, verbose=1)
grid_result = grid_search.fit(X_train, Y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    pyplot.errorbar(max_depth, means, yerr=stds)
    pyplot.title("XGBoost max_depth vs MSE")
    pyplot.xlabel('max_depth')
    pyplot.ylabel('Log Loss')
    pyplot.savefig('max_depth.png')

[1, 3, 5, 7, 9]
Fitting 3 folds for each of 5 candidates, totalling 15 fits




JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/usr/local/anaconda2/lib/python2.7/runpy.py in _run_module_as_main(mod_name='ipykernel.__main__', alter_argv=1)
    169     pkg_name = mod_name.rpartition('.')[0]
    170     main_globals = sys.modules["__main__"].__dict__
    171     if alter_argv:
    172         sys.argv[0] = fname
    173     return _run_code(code, main_globals, None,
--> 174                      "__main__", fname, loader, pkg_name)
        fname = '/usr/local/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = 'ipykernel'
    175 
    176 def run_module(mod_name, init_globals=None,
    177                run_name=None, alter_sys=False):
    178     """Execute a module's code without importing it

...........................................................................
/usr/local/anaconda2/lib/python2.7/runpy.py in _run_code(code=<code object <module> at 0x7f42451fb930, file "/...2.7/site-packages/ipykernel/__main__.py", line 1>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/usr/local/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/usr/local/a...python2.7/site-packages/ipykernel/kernelapp.pyc'>}, init_globals=None, mod_name='__main__', mod_fname='/usr/local/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='ipykernel')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 0x7f42451fb930, file "/...2.7/site-packages/ipykernel/__main__.py", line 1>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/usr/local/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/usr/local/a...python2.7/site-packages/ipykernel/kernelapp.pyc'>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py in <module>()
      1 
      2 
----> 3 
      4 if __name__ == '__main__':
      5     from ipykernel import kernelapp as app
      6     app.launch_new_instance()
      7 
      8 
      9 
     10 

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    469             return self.subapp.start()
    470         if self.poller is not None:
    471             self.poller.start()
    472         self.kernel.start()
    473         try:
--> 474             ioloop.IOLoop.instance().start()
    475         except KeyboardInterrupt:
    476             pass
    477 
    478 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    882                 self._events.update(event_pairs)
    883                 while self._events:
    884                     fd, events = self._events.popitem()
    885                     try:
    886                         fd_obj, handler_func = self._handlers[fd]
--> 887                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    888                     except (OSError, IOError) as e:
    889                         if errno_from_exception(e) == errno.EPIPE:
    890                             # Happens when the client closes the connection
    891                             pass

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    271         if self.control_stream:
    272             self.control_stream.on_recv(self.dispatch_control, copy=False)
    273 
    274         def make_dispatcher(stream):
    275             def dispatcher(msg):
--> 276                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    277             return dispatcher
    278 
    279         for s in self.shell_streams:
    280             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'# grid search\nmax_depth = range(1, 11, 2)\npr...g Loss\')\n    pyplot.savefig(\'max_depth.png\')', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2017-05-02T17:34:05.141968', u'msg_id': u'C792531207324AF48B5F96B3E6EA5319', u'msg_type': u'execute_request', u'session': u'746550B78D7E4FC8961B972C25286F7A', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'C792531207324AF48B5F96B3E6EA5319', 'msg_type': u'execute_request', 'parent_header': {}})
    223             self.log.error("UNKNOWN MESSAGE TYPE: %r", msg_type)
    224         else:
    225             self.log.debug("%s: %s", msg_type, msg)
    226             self.pre_handler_hook()
    227             try:
--> 228                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['746550B78D7E4FC8961B972C25286F7A']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u'# grid search\nmax_depth = range(1, 11, 2)\npr...g Loss\')\n    pyplot.savefig(\'max_depth.png\')', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2017-05-02T17:34:05.141968', u'msg_id': u'C792531207324AF48B5F96B3E6EA5319', u'msg_type': u'execute_request', u'session': u'746550B78D7E4FC8961B972C25286F7A', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'C792531207324AF48B5F96B3E6EA5319', 'msg_type': u'execute_request', 'parent_header': {}}
    229             except Exception:
    230                 self.log.error("Exception in message handler:", exc_info=True)
    231             finally:
    232                 self.post_handler_hook()

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['746550B78D7E4FC8961B972C25286F7A'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'# grid search\nmax_depth = range(1, 11, 2)\npr...g Loss\')\n    pyplot.savefig(\'max_depth.png\')', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2017-05-02T17:34:05.141968', u'msg_id': u'C792531207324AF48B5F96B3E6EA5319', u'msg_type': u'execute_request', u'session': u'746550B78D7E4FC8961B972C25286F7A', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'C792531207324AF48B5F96B3E6EA5319', 'msg_type': u'execute_request', 'parent_header': {}})
    385         if not silent:
    386             self.execution_count += 1
    387             self._publish_execute_input(code, parent, self.execution_count)
    388 
    389         reply_content = self.do_execute(code, silent, store_history,
--> 390                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    391 
    392         # Flush output before sending the reply.
    393         sys.stdout.flush()
    394         sys.stderr.flush()

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u'# grid search\nmax_depth = range(1, 11, 2)\npr...g Loss\')\n    pyplot.savefig(\'max_depth.png\')', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    191 
    192         self._forward_input(allow_stdin)
    193 
    194         reply_content = {}
    195         try:
--> 196             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u'# grid search\nmax_depth = range(1, 11, 2)\npr...g Loss\')\n    pyplot.savefig(\'max_depth.png\')'
        store_history = True
        silent = False
    197         finally:
    198             self._restore_input()
    199 
    200         if res.error_before_exec is not None:

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=(u'# grid search\nmax_depth = range(1, 11, 2)\npr...g Loss\')\n    pyplot.savefig(\'max_depth.png\')',), **kwargs={'silent': False, 'store_history': True})
    496             )
    497         self.payload_manager.write_payload(payload)
    498 
    499     def run_cell(self, *args, **kwargs):
    500         self._last_traceback = None
--> 501         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = (u'# grid search\nmax_depth = range(1, 11, 2)\npr...g Loss\')\n    pyplot.savefig(\'max_depth.png\')',)
        kwargs = {'silent': False, 'store_history': True}
    502 
    503     def _showtraceback(self, etype, evalue, stb):
    504         # try to preserve ordering of tracebacks and print statements
    505         sys.stdout.flush()

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u'# grid search\nmax_depth = range(1, 11, 2)\npr...g Loss\')\n    pyplot.savefig(\'max_depth.png\')', store_history=True, silent=False, shell_futures=True)
   2712                 self.displayhook.exec_result = result
   2713 
   2714                 # Execute the user code
   2715                 interactivity = "none" if silent else self.ast_node_interactivity
   2716                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2717                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2718                 
   2719                 self.last_execution_succeeded = not has_raised
   2720 
   2721                 # Reset this so later displayed values do not modify the

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Print object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Print object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.For object>], cell_name='<ipython-input-43-731f0dacf47e>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<ExecutionResult object at 7f41defb89d0, executi..._before_exec=None error_in_exec=None result=None>)
   2816 
   2817         try:
   2818             for i, node in enumerate(to_run_exec):
   2819                 mod = ast.Module([node])
   2820                 code = compiler(mod, cell_name, "exec")
-> 2821                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7f4131bc9c30, file "<ipython-input-43-731f0dacf47e>", line 7>
        result = <ExecutionResult object at 7f41defb89d0, executi..._before_exec=None error_in_exec=None result=None>
   2822                     return True
   2823 
   2824             for i, node in enumerate(to_run_interactive):
   2825                 mod = ast.Interactive([node])

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7f4131bc9c30, file "<ipython-input-43-731f0dacf47e>", line 7>, result=<ExecutionResult object at 7f41defb89d0, executi..._before_exec=None error_in_exec=None result=None>)
   2876         outflag = 1  # happens in more places, so it's easier as default
   2877         try:
   2878             try:
   2879                 self.hooks.pre_run_code_hook()
   2880                 #rprint('Running code', repr(code_obj)) # dbg
-> 2881                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7f4131bc9c30, file "<ipython-input-43-731f0dacf47e>", line 7>
        self.user_global_ns = {'AdaBoostRegressor': <class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, 'DecisionTreeRegressor': <class 'sklearn.tree.tree.DecisionTreeRegressor'>, 'Dense': <class 'keras.layers.core.Dense'>, 'ElasticNet': <class 'sklearn.linear_model.coordinate_descent.ElasticNet'>, 'ExtraTreesRegressor': <class 'sklearn.ensemble.forest.ExtraTreesRegressor'>, 'GradientBoostingRegressor': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'Imputer': <class 'sklearn.preprocessing.imputation.Imputer'>, 'In': ['', u"# Load libraries\nimport numpy\nfrom numpy imp...essor\nget_ipython().magic(u'matplotlib inline')", u"# Load full dataset\nfilename = 'ssdata_for_tr...ead_csv(filename, sep='\\t')\ndataset_full.shape", u'# Select features to be used to build model\nc...et_trim = dataset_full[cols]\ndataset_trim.shape', u'dataset_trim.head()', u'# Summarize Data\n\n# shape\nprint(dataset_trim.shape)\n# types\nprint(dataset_trim.dtypes)', u'# Show classes in the features in object type,...print dataset_trim.groupby(v).size()\n    print ', u'dataset_enc = dataset_trim\nmlb = []\nobj_cols...\n    #print mlb[i].classes_\ndataset_enc.head()', u'dataset_unfilled = dataset_enc\ndataset_unfilled.isnull().sum()', u"# Imputing NaN with mean value of the entire c...lled)\ndataset = imr.transform(dataset_unfilled)", u'# Prepare Data\n\n# Split-out validation datas...Y, test_size=validation_size, random_state=seed)', u"input_dim = 28\n\ndef plot_learning_curve(hist...quared_error', optimizer='adam')\n\treturn model", u"# Imputing NaN with mean value of the entire c...lled)\ndataset = imr.transform(dataset_unfilled)", u'numpy.random.randint(1, 10000)', u'numpy.random.randint(1, 10000, size=10000)', u'numpy.random.randint(1, dataset.shape[0], size=10000)', u'randindex=numpy.random.randint(1, dataset.shape[0], size=10000)', u'rand_ix=numpy.random.randint(1, dataset.shape[0], size=10000)\nrand_ix', u'rand_ix=numpy.random.randint(1, dataset.shape[0], size=10000)\nrand_ix', u'rand_ix=numpy.random.randint(1, dataset.shape[0], size=10000)\nrand_ix', ...], 'KFold': <class 'sklearn.model_selection._split.KFold'>, ...}
        self.user_ns = {'AdaBoostRegressor': <class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, 'DecisionTreeRegressor': <class 'sklearn.tree.tree.DecisionTreeRegressor'>, 'Dense': <class 'keras.layers.core.Dense'>, 'ElasticNet': <class 'sklearn.linear_model.coordinate_descent.ElasticNet'>, 'ExtraTreesRegressor': <class 'sklearn.ensemble.forest.ExtraTreesRegressor'>, 'GradientBoostingRegressor': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'Imputer': <class 'sklearn.preprocessing.imputation.Imputer'>, 'In': ['', u"# Load libraries\nimport numpy\nfrom numpy imp...essor\nget_ipython().magic(u'matplotlib inline')", u"# Load full dataset\nfilename = 'ssdata_for_tr...ead_csv(filename, sep='\\t')\ndataset_full.shape", u'# Select features to be used to build model\nc...et_trim = dataset_full[cols]\ndataset_trim.shape', u'dataset_trim.head()', u'# Summarize Data\n\n# shape\nprint(dataset_trim.shape)\n# types\nprint(dataset_trim.dtypes)', u'# Show classes in the features in object type,...print dataset_trim.groupby(v).size()\n    print ', u'dataset_enc = dataset_trim\nmlb = []\nobj_cols...\n    #print mlb[i].classes_\ndataset_enc.head()', u'dataset_unfilled = dataset_enc\ndataset_unfilled.isnull().sum()', u"# Imputing NaN with mean value of the entire c...lled)\ndataset = imr.transform(dataset_unfilled)", u'# Prepare Data\n\n# Split-out validation datas...Y, test_size=validation_size, random_state=seed)', u"input_dim = 28\n\ndef plot_learning_curve(hist...quared_error', optimizer='adam')\n\treturn model", u"# Imputing NaN with mean value of the entire c...lled)\ndataset = imr.transform(dataset_unfilled)", u'numpy.random.randint(1, 10000)', u'numpy.random.randint(1, 10000, size=10000)', u'numpy.random.randint(1, dataset.shape[0], size=10000)', u'randindex=numpy.random.randint(1, dataset.shape[0], size=10000)', u'rand_ix=numpy.random.randint(1, dataset.shape[0], size=10000)\nrand_ix', u'rand_ix=numpy.random.randint(1, dataset.shape[0], size=10000)\nrand_ix', u'rand_ix=numpy.random.randint(1, dataset.shape[0], size=10000)\nrand_ix', ...], 'KFold': <class 'sklearn.model_selection._split.KFold'>, ...}
   2882             finally:
   2883                 # Reset our crash handler in place
   2884                 sys.excepthook = old_excepthook
   2885         except SystemExit as e:

...........................................................................
/home/suns1/Desktop/127.0.0.1/tanmay/<ipython-input-43-731f0dacf47e> in <module>()
      2 max_depth = range(1, 11, 2)
      3 print(max_depth)
      4 param_grid = dict(max_depth=max_depth)
      5 kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)
      6 grid_search = GridSearchCV(model, param_grid, scoring="neg_mean_squared_error", n_jobs=-1, cv=kfold, verbose=1)
----> 7 grid_result = grid_search.fit(X_train, Y_train)
      8 
      9 # summarize results
     10 print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
     11 means = grid_result.cv_results_['mean_test_score']

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_search.py in fit(self=GridSearchCV(cv=StratifiedKFold(n_splits=3, rand...     scoring='neg_mean_squared_error', verbose=1), X=array([[  2.   ,   3.569,  -5.24 , ...,  25.   ,...3.432,  -8.41 , ...,   1.   ,  65.   ,  34.   ]]), y=array([ 0.06931248,  0.1439842 ,  0.03702476, ...,  0.12689765,
       -0.1279703 ,  0.62379083]), groups=None)
    940 
    941         groups : array-like, with shape (n_samples,), optional
    942             Group labels for the samples used while splitting the dataset into
    943             train/test set.
    944         """
--> 945         return self._fit(X, y, groups, ParameterGrid(self.param_grid))
        self._fit = <bound method GridSearchCV._fit of GridSearchCV(...    scoring='neg_mean_squared_error', verbose=1)>
        X = array([[  2.   ,   3.569,  -5.24 , ...,  25.   ,...3.432,  -8.41 , ...,   1.   ,  65.   ,  34.   ]])
        y = array([ 0.06931248,  0.1439842 ,  0.03702476, ...,  0.12689765,
       -0.1279703 ,  0.62379083])
        groups = None
        self.param_grid = {'max_depth': [1, 3, 5, 7, 9]}
    946 
    947 
    948 class RandomizedSearchCV(BaseSearchCV):
    949     """Randomized search on hyper parameters.

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_search.py in _fit(self=GridSearchCV(cv=StratifiedKFold(n_splits=3, rand...     scoring='neg_mean_squared_error', verbose=1), X=array([[  2.   ,   3.569,  -5.24 , ...,  25.   ,...3.432,  -8.41 , ...,   1.   ,  65.   ,  34.   ]]), y=array([ 0.06931248,  0.1439842 ,  0.03702476, ...,  0.12689765,
       -0.1279703 ,  0.62379083]), groups=None, parameter_iterable=<sklearn.model_selection._search.ParameterGrid object>)
    559                                   fit_params=self.fit_params,
    560                                   return_train_score=self.return_train_score,
    561                                   return_n_test_samples=True,
    562                                   return_times=True, return_parameters=True,
    563                                   error_score=self.error_score)
--> 564           for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.model_selection._search.ParameterGrid object>
    565           for train, test in cv_iter)
    566 
    567         # if one choose to see train score, "out" will contain train score info
    568         if self.return_train_score:

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object <genexpr>>)
    763             if pre_dispatch == "all" or n_jobs == 1:
    764                 # The iterable was consumed all at once by the above for loop.
    765                 # No need to wait for async callbacks to trigger to
    766                 # consumption.
    767                 self._iterating = False
--> 768             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    769             # Make sure that we get a last message telling us we are done
    770             elapsed_time = time.time() - self._start_time
    771             self._print('Done %3i out of %3i | elapsed: %s finished',
    772                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Tue May  2 17:34:11 2017
PID: 18426                   Python 2.7.13: /usr/local/anaconda2/bin/python
...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (Pipeline(steps=[('Scaler', StandardScaler(copy=T...learn.KerasRegressor object at 0x7f4131815450>)]), memmap([[  2.   ,   3.569,  -5.24 , ...,  25.   ...3.432,  -8.41 , ...,   1.   ,  65.   ,  34.   ]]), array([ 0.06931248,  0.1439842 ,  0.03702476, ...,  0.12689765,
       -0.1279703 ,  0.62379083]), make_scorer(mean_squared_error, greater_is_better=False), array([    0,     1,     4, ..., 39988, 39994, 39998]), array([    2,     3,     5, ..., 39996, 39997, 39999]), 1, {'max_depth': 1})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': True, 'return_times': True, 'return_train_score': True}
        self.items = [(<function _fit_and_score>, (Pipeline(steps=[('Scaler', StandardScaler(copy=T...learn.KerasRegressor object at 0x7f4131815450>)]), memmap([[  2.   ,   3.569,  -5.24 , ...,  25.   ...3.432,  -8.41 , ...,   1.   ,  65.   ,  34.   ]]), array([ 0.06931248,  0.1439842 ,  0.03702476, ...,  0.12689765,
       -0.1279703 ,  0.62379083]), make_scorer(mean_squared_error, greater_is_better=False), array([    0,     1,     4, ..., 39988, 39994, 39998]), array([    2,     3,     5, ..., 39996, 39997, 39999]), 1, {'max_depth': 1}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': True, 'return_times': True, 'return_train_score': True})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator=Pipeline(steps=[('Scaler', StandardScaler(copy=T...learn.KerasRegressor object at 0x7f4131815450>)]), X=memmap([[  2.   ,   3.569,  -5.24 , ...,  25.   ...3.432,  -8.41 , ...,   1.   ,  65.   ,  34.   ]]), y=array([ 0.06931248,  0.1439842 ,  0.03702476, ...,  0.12689765,
       -0.1279703 ,  0.62379083]), scorer=make_scorer(mean_squared_error, greater_is_better=False), train=array([    0,     1,     4, ..., 39988, 39994, 39998]), test=array([    2,     3,     5, ..., 39996, 39997, 39999]), verbose=1, parameters={'max_depth': 1}, fit_params={}, return_train_score=True, return_parameters=True, return_n_test_samples=True, return_times=True, error_score='raise')
    222     fit_params = fit_params if fit_params is not None else {}
    223     fit_params = dict([(k, _index_param_value(X, v, train))
    224                       for k, v in fit_params.items()])
    225 
    226     if parameters is not None:
--> 227         estimator.set_params(**parameters)
        estimator.set_params = <bound method Pipeline.set_params of Pipeline(st...earn.KerasRegressor object at 0x7f4131815450>)])>
        parameters = {'max_depth': 1}
    228 
    229     start_time = time.time()
    230 
    231     X_train, y_train = _safe_split(estimator, X, y, train)

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/sklearn/pipeline.py in set_params(self=Pipeline(steps=[('Scaler', StandardScaler(copy=T...learn.KerasRegressor object at 0x7f4131815450>)]), **kwargs={'max_depth': 1})
    175 
    176         Returns
    177         -------
    178         self
    179         """
--> 180         self._set_params('steps', **kwargs)
        self._set_params = <bound method Pipeline._set_params of Pipeline(s...earn.KerasRegressor object at 0x7f4131815450>)])>
        kwargs = {'max_depth': 1}
    181         return self
    182 
    183     def _validate_steps(self):
    184         names, estimators = zip(*self.steps)

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/sklearn/pipeline.py in _set_params(self=Pipeline(steps=[('Scaler', StandardScaler(copy=T...learn.KerasRegressor object at 0x7f4131815450>)]), steps_attr='steps', **params={'max_depth': 1})
     64         step_names, _ = zip(*getattr(self, steps_attr))
     65         for name in list(six.iterkeys(params)):
     66             if '__' not in name and name in step_names:
     67                 self._replace_step(steps_attr, name, params.pop(name))
     68         # 3. Step parameters and other initilisation arguments
---> 69         super(_BasePipeline, self).set_params(**params)
        self.set_params = <bound method Pipeline.set_params of Pipeline(st...earn.KerasRegressor object at 0x7f4131815450>)])>
        params = {'max_depth': 1}
     70         return self
     71 
     72     def _validate_names(self, names):
     73         if len(set(names)) != len(names):

...........................................................................
/usr/local/anaconda2/lib/python2.7/site-packages/sklearn/base.py in set_params(self=Pipeline(steps=[('Scaler', StandardScaler(copy=T...learn.KerasRegressor object at 0x7f4131815450>)]), **params={'max_depth': 1})
    286                 # simple objects case
    287                 if key not in valid_params:
    288                     raise ValueError('Invalid parameter %s for estimator %s. '
    289                                      'Check the list of available parameters '
    290                                      'with `estimator.get_params().keys()`.' %
--> 291                                      (key, self.__class__.__name__))
        key = 'max_depth'
        self.__class__.__name__ = 'Pipeline'
    292                 setattr(self, key, value)
    293         return self
    294 
    295     def __repr__(self):

ValueError: Invalid parameter max_depth for estimator Pipeline. Check the list of available parameters with `estimator.get_params().keys()`.
___________________________________________________________________________

### Tuning Feed Forward Neural Network Topologies

In [None]:
# create model


history = []

epochs=1000
batch_size=1024

# Fit the model
model = baseline_model()
his = model.fit(X_train, Y_train, validation_data=(X_validation,Y_validation), epochs=epochs, batch_size=batch_size, verbose=0)
history.append(his)

model = larger_model()
his = model.fit(X_train, Y_train, validation_data=(X_validation,Y_validation), epochs=epochs, batch_size=batch_size, verbose=0)
history.append(his)

#model = wider_model()
#his = model.fit(X_train, Y_train, validation_data=(X_validation,Y_validation), epochs=epochs, batch_size=batch_size, verbose=0)
#history.append(his)

#model = more_larger_model()
#his = model.fit(X_train, Y_train, validation_data=(X_validation,Y_validation), epochs=epochs, batch_size=batch_size, verbose=0)
#history.append(his)

plot_learning_curve(history[0])
plot_learning_curve(history[1])
plot_learning_curve(history[2])
plot_learning_curve(history[3])

In [None]:
# grid search epochs, batch size and optimizer
optimizers = ['rmsprop' , 'adam']
inits = ['glorot_uniform', 'normal', 'uniform']
epochs = [50, 100, 150]
batches = [5, 10, 20]
param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init=inits)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X, Y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
# Deep Neural Network Algorithm tuning
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)
k_values = numpy.array([1,3,5,7,9,11,13,15,17,19,21])
param_grid = dict(n_neighbors=k_values)
model = KNeighborsRegressor()
kfold = KFold(n_splits=num_folds, random_state=seed)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=kfold)
grid_result = grid.fit(rescaledX, Y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
# ensembles
ensembles = []
ensembles.append(('ScaledAB', Pipeline([('Scaler', StandardScaler()),('AB', AdaBoostRegressor())])))
ensembles.append(('ScaledGBM', Pipeline([('Scaler', StandardScaler()),('GBM', GradientBoostingRegressor())])))
ensembles.append(('ScaledRF', Pipeline([('Scaler', StandardScaler()),('RF', RandomForestRegressor())])))
ensembles.append(('ScaledET', Pipeline([('Scaler', StandardScaler()),('ET', ExtraTreesRegressor())])))
results = []
names = []
for name, model in ensembles:
	kfold = KFold(n_splits=num_folds, random_state=seed)
	cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
	results.append(cv_results)
	names.append(name)
	msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
	print(msg)

# Compare Algorithms
fig = pyplot.figure()
fig.suptitle('Scaled Ensemble Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()

In [None]:
# Tune scaled GBM
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)
param_grid = dict(n_estimators=numpy.array([50,100,150,200,250,300,350,400]))
model = GradientBoostingRegressor(random_state=seed)
kfold = KFold(n_splits=num_folds, random_state=seed)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=kfold)
grid_result = grid.fit(rescaledX, Y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
# Make predictions on validation dataset

# prepare the model
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)
model = GradientBoostingRegressor(random_state=seed, n_estimators=400)
model.fit(rescaledX, Y_train)
# transform the validation dataset
rescaledValidationX = scaler.transform(X_validation)
predictions = model.predict(rescaledValidationX)
print(mean_squared_error(Y_validation, predictions))