##### Steps #####

1. Detect and Load dataset
2. Keras
3. Scikit
4. Kfold
5. ROC curve
6. Classification report
7. Write back the results

---

##### Detecting and loading data #####

In [32]:
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import numpy as np


scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']

credentials = ServiceAccountCredentials.from_json_keyfile_name('../data/client_secret.json', scope)

gc = gspread.authorize(credentials)

sh = gc.open('Dataset details')

In [35]:
worksheet = sh.get_worksheet(0)
dataset_list = worksheet.col_values(2)
params_list = worksheet.row_values(2)[7:]
params_dict = {x:i+7 for i,x in enumerate(params_list)}
activation_list = worksheet.col_values(3)
dataset_list

# for data in dataset_list:
#     if data follows a specific format then:
#         process the dataset
#         Create a list of dataframes
row_nb = 3
n_col_nb = 4 
p_col_nb = 5
c_col_nb = 7
activation_function = activation_list[row_nb]
data_url = dataset_list[row_nb]

In [None]:
# Data prep for iris dataset #

data = pd.read_csv(data_url, delimiter=",", header=None, index_col=False)
class_name,index = np.unique(data.iloc[:,-1],return_inverse=True)
data.iloc[:,-1] = index
data = data.loc[data[4] != 2]
X = data.iloc[:,:-1]
Y = data.iloc[:,-1]
n=data.shape[0]
p=X.shape[1]

unique,count = np.unique(Y,return_counts=True)
class_distribution = str(count[0]) + " : " + str(count[1])
class_distribution

In [41]:
# Data prep for Adult salary dataset #
names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num',
         'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 
         'hours-per-week', 'native-country', 'target']
data = pd.read_csv(data_url, delimiter=",", header=None, index_col=False,names=names)
data = data[data["workclass"] != "?"]
data = data[data["occupation"] != "?"]
data = data[data["native-country"] != "?"]

# Convert categorical fields #
categorical_col = ['workclass', 'education', 'marital-status', 'occupation',
                   'relationship', 'race', 'sex', 'native-country', 'target']

for col in categorical_col:
    b, c = np.unique(data[col], return_inverse=True)
    data[col] = c

feature_list = names[:14]
# Test train split #
X = data.loc[:, feature_list]
Y = data[['target']]

# data.iloc[:,-1] = index
# data = data.loc[data[4] != 2]
# X = data.iloc[:,:-1]
# Y = data.iloc[:,-1]
n=data.shape[0]
p=X.shape[1]

unique,count = np.unique(Y,return_counts=True)
class1=count[0]/data.shape[0]*100
class2=count[1]/data.shape[0]*100
class_distribution = str(round(class1)) + " : " + str(round(class2))
class_distribution

'76.0 : 24.0'

In [42]:
from sklearn.model_selection import train_test_split
# Class1_distribution=Y[1].value_counts()[0] / Y.shape[0] * 100
# Class2_distribution=Y[1].value_counts()[1] / Y.shape[0] * 100
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)

---

##### Keras model #####

In [44]:
### Logistic regression using DNN ###
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.utils import np_utils

np.random.seed(7)

# Defining model #
if activation_function == "Logistic regression":
    activation = "sigmoid"
input_dim = X.shape[1]
epoch = 2000 # Add in the sheet
batch_size = 300 # Add in the sheet


model = Sequential()
model.add(Dense(1,input_dim=input_dim,activation=activation))

# Compile the model #

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit the model #

# model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=200, batch_size=10)
model.fit(x_train, y_train, epochs=epoch, batch_size=batch_size, verbose=0)
# Evaluate the model #

scores = model.evaluate(x_test, y_test)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


acc: 75.66%


**Model details to be added**
1. Score/Accuracy
2. bias_initializer
3. kernel_regularizer
4. bias_regularizer
5. layer name
6. use bias
7. kernel initializer
8. activity regularizer
9. kernel constraint
10. bias constraint

*What do these values represent?*

In [45]:
keras_params = model.get_config()
keras_params = keras_params['layers'][0]['config']
keras_params['kernel_initializer'] = keras_params['kernel_initializer']['class_name']
keras_params['bias_initializer'] = keras_params['bias_initializer']['class_name']
keras_params

{'name': 'dense_3',
 'trainable': True,
 'batch_input_shape': (None, 14),
 'dtype': 'float32',
 'units': 1,
 'activation': 'sigmoid',
 'use_bias': True,
 'kernel_initializer': 'VarianceScaling',
 'bias_initializer': 'Zeros',
 'kernel_regularizer': None,
 'bias_regularizer': None,
 'activity_regularizer': None,
 'kernel_constraint': None,
 'bias_constraint': None}

---

##### Scikit model #####

In [46]:
# Scikit learn #

# Import and create an instance of your model(Logistic regression)

from sklearn.linear_model import LogisticRegression
logisticRegr = LogisticRegression()

# Train your model using the training dataset

logisticRegr.fit(x_train,y_train)

# Predict the output 

predictions = logisticRegr.predict(x_test)

score = logisticRegr.score(x_test,y_test)
print(score)

  y = column_or_1d(y, warn=True)


0.7898346726723653


##### Model details to collect #####
1. All values returned by get_params method


In [47]:
scikit_params = logisticRegr.get_params(deep=True)

##### Writing the values back to the sheet #####

In [48]:
scikit_params

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'max_iter': 100,
 'multi_class': 'ovr',
 'n_jobs': 1,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'liblinear',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [49]:
# Steps #
# 1. Read and figure out the relevant column and row
# 2. Map the Keras/Scikit dicts with the columns and write to the corresponding cells
# keras_params['layers']
# scikit_params
for param,col_nb in params_dict.items():
    for s_param,value in scikit_params.items():
        if param == s_param:
            if value == None:
                value = 'None'
            worksheet.update_cell(row_nb+1, col_nb+1, value)
            

for param,col_nb in params_dict.items():
    for k_param,value in keras_params.items():
        if param == k_param:
            if value == None:
                value = 'None'
            worksheet.update_cell(row_nb+1, col_nb+1, value)
worksheet.update_cell(row_nb+1, n_col_nb, n)
worksheet.update_cell(row_nb+1, p_col_nb, p)
worksheet.update_cell(row_nb+1, c_col_nb, class_distribution)

{'spreadsheetId': '1E5jcq2w42gN8bMIaeaRJpAdhgSVN-2XDJ_YTHe4qfwY',
 'updatedRange': 'Sheet1!G4',
 'updatedRows': 1,
 'updatedColumns': 1,
 'updatedCells': 1}