### Linear regression ###

#### Notes 

To-Do  
  
1. Hyperparamter tuning
    1. Do the scan in the defined boundary
    2. Gather the best model
    3. Read about GPU usage in Talos
2. Saving the best model to ONNX format
    1. Use deploy to save as JSON
    2. save method in Keras
    3. ONNX options
        + Use onnxmltool
    4. Is there an option to retrieve the best model from the  
    Talos scan_object without using Deploy.
    5. Problem with Deploy -- you have to save it locally and then read it back
3. Research on the best models
4. LaTeX equations
5. Create decorator
    + Returns ONNX model
    + Writes keras and scikit params to master sheet

#### Scikit ####

In [4]:
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.preprocessing import StandardScaler

diabetes = datasets.load_diabetes()

# Use only one feature
diabetes_X = diabetes.data[:, np.newaxis, 2]
sc = StandardScaler()
diabetes.data = sc.fit_transform(diabetes.data)

x = diabetes_X
y = diabetes.target


# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]

# Split the targets into training/testing sets
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]

# Create linear regression object
regr = linear_model.LinearRegression()

# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)

# Make predictions using the testing set
diabetes_y_pred = regr.predict(diabetes_X_test)

score = regr.score(diabetes_X_test,diabetes_y_test)
mean_squared_error(diabetes_y_test, diabetes_y_pred)

2548.0723987259703

In [27]:
diabetes.data

array([[ 0.80050009,  1.06548848,  1.29708846, ..., -0.05449919,
         0.41855058, -0.37098854],
       [-0.03956713, -0.93853666, -1.08218016, ..., -0.83030083,
        -1.43655059, -1.93847913],
       [ 1.79330681,  1.06548848,  0.93453324, ..., -0.05449919,
         0.06020733, -0.54515416],
       ...,
       [ 0.87686984,  1.06548848, -0.33441002, ..., -0.23293356,
        -0.98558469,  0.32567395],
       [-0.9560041 , -0.93853666,  0.82123474, ...,  0.55838411,
         0.93615545, -0.54515416],
       [-0.9560041 , -0.93853666, -1.53537419, ..., -0.83030083,
        -0.08871747,  0.06442552]])

#### Keras ###

In [6]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD, Nadam
from keras.losses import mse
from keras.activations import linear
import talos as ta

# np.random.seed(7)

# The parameter 'p' should be dynamic and a part of the mapping

p = {'lr': (2, 10, 30),
     'first_neuron': [1],
     'batch_size': [1, 2, 3, 4],
     'epochs': [10,20,40],
     'weight_regulizer': [None],
     'emb_output_dims': [None],
     'optimizer': ['SGD', 'nadam'],
     'losses': [mse],
     'activation': [linear]
    }

def keras_model(x_train, y_train, x_val, y_val, params):
    model = Sequential()
    model.add(Dense(params['first_neuron'],
                    input_dim=x_train.shape[1],
                    activation=params['activation']))

    model.compile(optimizer=params['optimizer'],
                  loss=params['losses'],
                  metrics=['mse'])

    out = model.fit(diabetes_X_train, diabetes_y_train,
                    batch_size=params['batch_size'],
                    epochs=params['epochs'],
                    verbose=0,
                    validation_data=[x_val, y_val])
    
    return out, model
    

    
h = ta.Scan(x, y,
            params=p,
            dataset_name='first_linear_regression',
            experiment_no='a',
            model=keras_model,
            grid_downsample=0.5)
    
    


  0%|          | 0/96 [00:00<?, ?it/s]
  1%|          | 1/96 [00:17<27:09, 17.15s/it]
  2%|▏         | 2/96 [00:21<21:02, 13.44s/it]
  3%|▎         | 3/96 [00:30<18:47, 12.12s/it]
  4%|▍         | 4/96 [00:40<17:30, 11.41s/it]
  5%|▌         | 5/96 [00:57<19:55, 13.14s/it]
  6%|▋         | 6/96 [01:02<16:03, 10.70s/it]
  7%|▋         | 7/96 [01:06<12:53,  8.69s/it]
  8%|▊         | 8/96 [01:08<09:49,  6.70s/it]
  9%|▉         | 9/96 [01:18<10:44,  7.40s/it]
 10%|█         | 10/96 [01:34<14:33, 10.16s/it]
 11%|█▏        | 11/96 [02:07<24:13, 17.10s/it]
 12%|█▎        | 12/96 [02:15<19:59, 14.28s/it]
 14%|█▎        | 13/96 [02:21<16:17, 11.78s/it]
 15%|█▍        | 14/96 [02:25<12:54,  9.44s/it]
 16%|█▌        | 15/96 [02:34<12:33,  9.30s/it]
 17%|█▋        | 16/96 [02:42<11:42,  8.78s/it]
 18%|█▊        | 17/96 [03:00<15:20, 11.65s/it]
 19%|█▉        | 18/96 [03:09<14:06, 10.85s/it]
 20%|█▉        | 19/96 [03:26<16:20, 12.73s/it]
 21%|██        | 20/96 [03:29<12:20,  9.74s/it]
 22%|██▏ 

Scan Finished!


In [9]:
# Testing deploy from Talos 
from talos import Deploy

Deploy(h, 'linear_regression_firstDataset',metric='val_loss')

Deploy package linear_regression_firstDataset have been saved.


<talos.commands.deploy.Deploy at 0x25503598c88>

In [41]:
# loading the best model

import os,json,zipfile,shutil
from keras.models import model_from_json
archive = zipfile.ZipFile('linear_regression_firstDataset.zip', 'r')
model_file = archive.open('linear_regression_firstDataset_model.json')
weight_file = archive.open('linear_regression_firstDataset_model.h5')

with zipfile.ZipFile('linear_regression_firstDataset.zip', 'r') as zip_ref:
    zip_ref.extractall('./linear_regression_firstDataset_unzip')

# json_file = open('model.json', 'r')
loaded_model_json = model_file.read()

# json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("./linear_regression_firstDataset/linear_regression_firstDataset_model.h5")


shutil.rmtree('./linear_regression_firstDataset_unzip')
print("Loaded model from disk")

Loaded model from disk


In [42]:
type(loaded_model)

keras.engine.sequential.Sequential

In [44]:
# Testing ONNX ml tool
import onnxmltools
onnx_model = onnxmltools.convert_keras(loaded_model)

The maximum opset needed by this model is only 7.


In [45]:
onnx_model

ir_version: 3
producer_name: "OnnxMLTools"
producer_version: "1.3.1"
domain: "onnxml"
model_version: 0
doc_string: ""
graph {
  node {
    input: "dense_1_input_14_0"
    input: "W"
    output: "transformed_tensor"
    name: "_class__keras_layers_core_Dense__"
    op_type: "MatMul"
    domain: ""
  }
  node {
    input: "transformed_tensor"
    input: "B"
    output: "biased_tensor_name"
    name: "Add"
    op_type: "Add"
    domain: ""
  }
  node {
    input: "biased_tensor_name"
    output: "dense_1_14_BiasAdd_01"
    name: "Identity"
    op_type: "Identity"
    domain: ""
  }
  name: "e704ad28cdf745b2bf06ded1ab6e743d"
  initializer {
    dims: 1
    dims: 1
    data_type: FLOAT
    float_data: 162.32818603515625
    name: "W"
  }
  initializer {
    dims: 1
    data_type: FLOAT
    float_data: 153.9892578125
    name: "B"
  }
  input {
    name: "dense_1_input_14_0"
    type {
      tensor_type {
        elem_type: FLOAT
        shape {
          dim {
            dim_value: 1
     

In [42]:
keras_score = model.evaluate(diabetes_X_test, diabetes_y_pred)
print(model.metrics_names[0],':', keras_score[0])

loss : 1956.4066162109375


#### Notes ####

1) MSE or accuracy? What is the right metric in this case?  
2) Try altering
    - learning rate
    - optimizer or loss function
    - 
3) **Normailzation of data**  
4) Check if anything is wrong with model.evaluate  
5) Size of loss too large?  
6) Why is the epoch mse value different from the final evaluated value?  
7) How to validate if this dnn model is the right representation of linear regression?     
The math behind the Keras code  
8)  

***

### SVM ###

#### Scikit ####

In [9]:
from sklearn import svm
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

data = pd.read_csv("../data/iris.csv" , delimiter=",", header=None, index_col=False)
class_name,index = np.unique(data.iloc[:,-1],return_inverse=True)
data.iloc[:,-1] = index
data = data.loc[data[4] != 2]
X = data.iloc[:,:-1]
Y = data.iloc[:,-1]

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)

model = svm.SVC(kernel='linear', C=1, gamma=1)

model.fit(x_train, y_train)
score = model.score(x_test, y_test)

prediction= model.predict(x_test)

print(score)

1.0


In [11]:
prediction.shape

(60,)

#### Keras ####

In [41]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.regularizers import l2

np.random.seed(7)

model = Sequential()
model.add(Dense(1,input_dim=4,activation='linear', W_regularizer=l2(0.01)))

model.compile(loss='categorical_hinge', optimizer='adadelta', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=120, batch_size=10)

  


Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78

<keras.callbacks.History at 0x1c98933e6a0>

In [42]:
keras_score = model.evaluate(x_test, prediction)
print(model.metrics_names[1],':', keras_score[1])

acc : 1.0


---

### Naive Bayes

#### Scikit

In [45]:
from sklearn.naive_bayes import GaussianNB 

model = GaussianNB()

model.fit(x_train,y_train)

predicted= model.predict(x_test)

score = model.score(x_test, y_test)
print(score)

1.0


#### Keras