In [0]:
!pip install hyperas
!pip install hyperopt

In [0]:
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform

In [0]:
import pandas as pd
import numpy as np
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, RobustScaler, PowerTransformer
from sklearn import preprocessing
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter
from keras.models import Sequential
import time
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Activation
from sklearn.utils import class_weight
from keras import utils as cat_utils
from sklearn.model_selection import train_test_split

**Fetching data for base learner optimization**

In [0]:
data_git = 'https://raw.githubusercontent.com/rohandongare-nci/18120199-Data/master/Base%20Learner%20Optimization/Base%20Learners%20Optimization.csv'
sdss_1 = pd.read_csv(data_git)

In [0]:
sdss_opt=sdss_1

In [0]:
sdss_opt.shape

In [0]:
unwanted_columns = ['camcol','run','rerun','objid','specobjid']
sdss_opt.drop(unwanted_columns, axis=1, inplace=True)
sdss_opt.head(10)

In [0]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
enc = LabelEncoder()
X_opt = sdss_opt[['ra','dec','u','g','r','i','z','redshift','plate','mjd','fiberid','field']]
y_opt = enc.fit_transform(sdss_opt['class'])
X_opt.head()

**Normalization and Transformation**

In [0]:
X_opt = preprocessing.normalize(X_opt, norm='l2')

In [0]:
transform_opt = preprocessing.PowerTransformer(method='yeo-johnson', standardize=True)

In [0]:
X_opt = transform_opt.fit_transform(X_opt)

In [0]:
print(X_opt)

**Two approaches towards optimizing Neural Networks, one with weights and other with undersampling**

**using weights**

In [0]:
sdss_weights=class_weight.compute_class_weight('balanced',np.unique(y),y)
print(y)

**Undersampling the majority class**

In [0]:
Counter(y_opt)

In [0]:
under_sampler_opt = RandomUnderSampler(random_state=18120199,replacement=False)
X_undersam, y_undersam = under_sampler_opt.fit_resample(X_opt, y_opt)

In [0]:
Counter(y_undersam)

In [0]:
y_undersam = to_categorical(y_undersam)

In [0]:
from sklearn.model_selection import train_test_split
X_train, X_val, Y_train, Y_val = train_test_split(X_undersam, y_undersam, test_size = 0.2, random_state = 18120199)

In [0]:
print(X_train)

**Neural Network Optimization**

**Hyperopt requires a separate function defined for fetching data, any value outside of the function is not recognized**

In [0]:
def under_data():
    import keras
    from keras import utils as cat_utils
    from sklearn.preprocessing import LabelEncoder, PowerTransformer
    from keras.models import Sequential
    import time
    from keras.layers import Dense
    from keras.layers import Dropout
    from keras.layers import Activation
    #fetching raw data from my own repository
    data_git = 'https://raw.githubusercontent.com/rohandongare-nci/'\
    '18120199-Data/master/Base%20Learner%20Optimization/Base%20Learners%20Optimization.csv'
    sdss_opt_nn = pd.read_csv(data_git)
    unwanted_columns = ['camcol','run','rerun','objid','specobjid']
    sdss_opt_nn.drop(unwanted_columns, axis=1, inplace=True)
    enc = LabelEncoder()
    X_opt_nn = sdss_opt_nn[['ra','dec','u','g','r','i','z','redshift','plate','mjd','fiberid','field']]
    y_opt_nn = enc.fit_transform(sdss_opt_nn['class'])
    #normalizing the data
    X_opt_nn = preprocessing.normalize(X_opt_nn, norm='l2')
    #transforming the data using yeo-johnson transformer
    transform_opt = preprocessing.PowerTransformer(method='yeo-johnson', standardize=True)
    X_opt_nn = transform_opt.fit_transform(X_opt_nn)
    under_sampler_opt = RandomUnderSampler(random_state=18120199,replacement=False)
    X_undersam_nn, y_undersam_nn = under_sampler_opt.fit_resample(X_opt_nn, y_opt_nn)
    #onehot encoding the target variable
    y_undersam_nn = cat_utils.to_categorical(y_undersam_nn, 3)
    xtrain, xval, ytrain, yval = train_test_split(X_undersam_nn,y_undersam_nn,test_size=0.2, random_state=18120199)
    return xtrain, ytrain, xval, yval

**Function to define NN model** 

In [0]:
def model_opt(xtrain, ytrain, xval, yval):
    model_sdss_opt = Sequential()
    #defining search space for number of neurons
    model_sdss_opt.add(Dense({{choice([64,128,256,512,1024])}}, input_shape=(12,)))
    #defining search space for the activation function using the choice function
    model_sdss_opt.add(Activation({{choice(['relu','tanh','sigmoid'])}}))
    #defining search space for dropout regularization
    model_sdss_opt.add(Dropout({{uniform(0, 1)}}))
    #similarly defining search space for layer 2,3,4
    model_sdss_opt.add(Dense({{choice([64,128,256,512,1024])}}))
    model_sdss_opt.add(Activation({{choice(['relu','tanh','sigmoid'])}}))
    model_sdss_opt.add(Dropout({{uniform(0, 1)}}))
    model_sdss_opt.add(Dense({{choice([64,128,256,512,1024])}}))
    model_sdss_opt.add(Activation({{choice(['relu','tanh','sigmoid'])}}))
    model_sdss_opt.add(Dropout({{uniform(0, 1)}}))
    model_sdss_opt.add(Dense({{choice([64,128,256,512,1024])}}))
    model_sdss_opt.add(Activation({{choice(['relu','tanh','sigmoid'])}}))
    model_sdss_opt.add(Dropout({{uniform(0, 1)}}))
    #checking if an additional layer will lead to a better performance
    if {{choice(['four', 'five'])}} == 'five':
        model_sdss_opt.add(Dense({{choice([64,128,256,512,1024])}}))
        model_sdss_opt.add(Activation({{choice(['relu', 'sigmoid','tanh'])}}))
        model_sdss_opt.add(Dropout({{uniform(0, 1)}}))
    model_sdss_opt.add(Dense(3))
    model_sdss_opt.add(Activation('softmax'))
    adam_opt = keras.optimizers.Adam(lr={{choice([10**-4, 10**-3, 10**-2])}})
    model_sdss_opt.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model_sdss_opt.fit(xtrain, ytrain,
              batch_size={{choice([150,300])}},
              nb_epoch=1,
              verbose=1,
              validation_data=(xval, yval))
    eval_score, model_acc = model_sdss_opt.evaluate(xval, yval, verbose=1)
    print('test accuracy acheived:', model_acc)
    return {'loss': -model_acc, 'status': STATUS_OK, 'model': model_sdss_opt}

In [0]:
xtrain, ytrain, xval, yval = under_data()

In [0]:
start_nn=time.time()
best_nn_run, best_nn_model = optim.minimize(model=model_opt,
                                      data=under_data,
                                      algo=tpe.suggest,
                                      max_evals=30,
                                      trials=Trials(),
                                      notebook_name='optimizing_neural_network_base_learner')
end_nn=time.time()

**Uploading this notebook on google cloud as it is requirement of hyperas**

In [0]:
from google.colab import files
uploaded = files.upload()

In [0]:
!ls

**Best run hyperparameters**

In [0]:
print(best_nn_run)