### Experiments using IMLY ###

This notebook contains experimental runs of IMLY with different datasets.  
The readings of these experiments can be referred to in this [sheet](https://docs.google.com/spreadsheets/d/1E5jcq2w42gN8bMIaeaRJpAdhgSVN-2XDJ_YTHe4qfwY/edit?usp=sharing).

---

### Dataset  #1

#### Diabetes dataset

In [5]:
import automation_script
from os import path
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler

dataset_info = automation_script.get_dataset_info("diabetes")
url = "../data/diabetes.csv" if path.exists("../data/diabetes.csv") else dataset_info['url']
data = pd.read_csv(url, delimiter=",", header=None, index_col=False)
sc = StandardScaler()
data = sc.fit_transform(data)
data = pd.DataFrame(data)


X = data.iloc[:,:-1]
Y = data.iloc[:,-1]

# X = preprocessing.scale(X)
# Y = preprocessing.normalize(Y)

automation_script.run_imly(dataset_info, 'linear_regression', X, Y, 0.60)

100%|██████████| 1/1 [00:03<00:00,  3.11s/it]


Scan Finished!


In [None]:
X

---

### Dataset  #2

#### UCI Abalone dataset

In [2]:
import experiment_automation_script
import pandas as pd
import numpy as np
from os import path
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression


dataset_info = experiment_automation_script.get_dataset_info("uci_abalone")

names = ["sex", "length", "diameter", "height", "whole weight",
        "shucked weight", "viscera weight", "shell weight", "rings"]
url = "../data/abalone.data.csv" if path.exists("../data/abalone.data.csv") else dataset_info['url']
data = pd.read_csv(url, delimiter=",", header=None, names=names, index_col=False)
data.head()

# Check for columns that contain missing values #
col_names = data.columns

num_data = data.shape[0]

categorical_col = ['sex']
for col in categorical_col:
    b, c = np.unique(data[col], return_inverse=True)
    data[col] = c

    
# Filter dataset to contain 'rings' 9 and 10 #
data = data[data['rings'].isin([9,10])]
data['rings'] = data['rings'].map({9: 0, 10: 1})


feature_list = names[:7]
X = data.loc[:, feature_list]
Y = data[['rings']]


experiment_automation_script.dopify(dataset_info, 'logistic_regression', X, Y, 0.60)

  y = column_or_1d(y, warn=True)


Keras classifier chosen


  y = column_or_1d(y, warn=True)
100%|██████████| 1/1 [00:01<00:00,  1.53s/it]


Scan Finished!
  round_epochs            val_loss                loss  lr units batch_size  \
0           10  0.6475951799056815  0.6550543823757687  30     1         10   

  epochs weight_regulizer emb_output_dims optimizer               losses  \
0     10             None            None      adam  binary_crossentropy   

  activation          model_name  
0    sigmoid  LogisticRegression  


---

### Dataset  #3

#### UCI Iris dataset

In [3]:
import experiment_automation_script
import pandas as pd
import numpy as np
from os import path

dataset_name = "uci_iris"
dataset_info = experiment_automation_script.get_dataset_info(dataset_name)

url = "../data/iris.csv" if path.exists("../data/iris.csv") else dataset_info['url']
data = pd.read_csv(url , delimiter=",", header=None, index_col=False)
class_name,index = np.unique(data.iloc[:,-1],return_inverse=True)
data.iloc[:,-1] = index
data = data.loc[data[4] != 2]
X = data.iloc[:,:-1]
Y = data.iloc[:,-1]

params = {
    'epochs': 200
}

experiment_automation_script.dopify(dataset_info, 'logistic_regression', X, Y, 0.60, params=params)

Keras classifier chosen


100%|██████████| 1/1 [00:02<00:00,  2.22s/it]


Scan Finished!
  round_epochs            val_loss                loss  lr units batch_size  \
0          200  0.7741430004437765  0.7907322347164154  30     1         10   

  epochs weight_regulizer emb_output_dims optimizer               losses  \
0    200             None            None      adam  binary_crossentropy   

  activation          model_name  
0    sigmoid  LogisticRegression  


In [3]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)
np.random.seed(7)

def create_model():
    model = Sequential()
    model.add(Dense(1,input_dim=4,activation='sigmoid'))

    # Compile the model #

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model
    

model = KerasClassifier(build_fn=create_model)
model.fit(x_train, y_train)

scores = model.score(x_test, y_test)
scores

from else
input from __call__  --  Tensor("dense_1_input:0", shape=(?, 4), dtype=float32)
Epoch 1/1


0.4666666626930237

In [7]:
create_model.__call__()

input from __call__  --  Tensor("dense_2_input_1:0", shape=(?, 4), dtype=float32)


<keras.engine.sequential.Sequential at 0x175431b9f28>

---

### Dataset  #4

#### UCI Adult salary dataset

In [1]:
import experiment_automation_script
import pandas as pd
import numpy as np
from os import path

dataset_name = "uci_adult_salary"
dataset_info = experiment_automation_script.get_dataset_info(dataset_name)


names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num',
         'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 
         'hours-per-week', 'native-country', 'target']
url = "../data/iris.csv" if path.exists("../data/dataset.csv.csv") else dataset_info['url']
data = pd.read_csv(url, delimiter=" ", header=None, names=names)


data = data[data["workclass"] != "?"]
data = data[data["occupation"] != "?"]
data = data[data["native-country"] != "?"]

# Convert categorical fields #
categorical_col = ['workclass', 'education', 'marital-status', 'occupation',
                   'relationship', 'race', 'sex', 'native-country', 'target']

for col in categorical_col:
    b, c = np.unique(data[col], return_inverse=True)
    data[col] = c

feature_list = names[:14]
# Test train split #
X = data.loc[:, feature_list]
Y = data[['target']]

experiment_automation_script.dopify(dataset_info, 'logistic_regression', X, Y, 0.60)

# Split the dataset into test and train datasets

# from sklearn.model_selection import train_test_split
# x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)

Using TensorFlow backend.
  y = column_or_1d(y, warn=True)


Keras classifier chosen


  y = column_or_1d(y, warn=True)
  0%|          | 0/1 [00:00<?, ?it/s]

<keras.engine.sequential.Sequential object at 0x0000022E25D08160>
From try --  <function glm at 0x0000022E23B86F28>


100%|██████████| 1/1 [00:31<00:00, 31.83s/it]


Scan Finished!
from elif
<keras.engine.sequential.Sequential object at 0x0000022EFAA60E10>
From except --  <function glm at 0x0000022E23B86F28>
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


---

### Dataset  #5

#### UCI Ad dataset

In [1]:
import experiment_automation_script
import pandas as pd
import numpy as np
from os import path
from sklearn.preprocessing import LabelEncoder

dataset_name = "uci_ad"
dataset_info = experiment_automation_script.get_dataset_info(dataset_name)

url = "../data/ad.data.csv" if path.exists("../data/dataset.csv.csv") else dataset_info['url']
data = pd.read_csv(url, delimiter=",", header=None, index_col=False)

# Check for columns that contain missing values #

data = data.applymap(lambda val: np.nan if str(val).strip() == '?' else val)
data = data.dropna()


# Label encoding #

lb = LabelEncoder()
Y = lb.fit_transform(data.iloc[:, -1])

X = data.iloc[:,:-1]

# Normalize the X values #
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)
X = pd.DataFrame(X)
Y = pd.DataFrame(Y)

experiment_automation_script.dopify(dataset_info, 'logistic_regression', X, Y, 0.60)

Using TensorFlow backend.
  y = column_or_1d(y, warn=True)


Keras classifier chosen


  y = column_or_1d(y, warn=True)
  0%|          | 0/1 [00:00<?, ?it/s]

<keras.engine.sequential.Sequential object at 0x000001C8E1478668>
From try --  <function glm at 0x000001C8E1236F28>


100%|██████████| 1/1 [00:03<00:00,  3.71s/it]


Scan Finished!
from elif
<keras.engine.sequential.Sequential object at 0x000001C8E1478940>
From except --  <function glm at 0x000001C8E1236F28>
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


---

### Dataset  #6

#### UCI Mushroom dataset

In [1]:
# Load dataset info #
import automation_script
import pandas as pd
import numpy as np
from os import path

dataset_name = "uci_mushroom"
dataset_info = automation_script.get_dataset_info(dataset_name)

Using TensorFlow backend.


In [2]:
names = ['classes', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor', 'gill-attachment',
        'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
        'stalk-surface-below-ring', 'stalk-color-above-ring', 'stalk-color-below-ring',
        'veil-type', 'veil-color', 'ring-number', 'ring-type', 'spore-print-color',
        'population', 'habitat']
url = "../data/mushroom.data.csv" if path.exists("../data/dataset.csv.csv") else dataset_info['url']
data = pd.read_csv(url, delimiter=",", header=None, names=names, index_col=False)

# Check for columns that contain missing values #

print("Fields with missing values")
col_names = data.columns
num_data = data.shape[0]
for c in col_names:
    num_non = data[c].isin(["?"]).sum()
    if num_non > 0:
        print (c)
        print (num_non)
        print ("{0:.2f}%".format(float(num_non) / num_data * 100))
        print ("\n")

data = data[data["stalk-root"] != "?"]

# Convert categorical fields #

for col in names:
    b, c = np.unique(data[col], return_inverse=True)
    data[col] = c

# Split the dataset into test and train datasets #
feature_list = names[1:23]
X = data.loc[:, feature_list]
Y = data[['classes']]

Fields with missing values
stalk-root
2480
30.53%




In [7]:
automation_script.run_imly(dataset_info, 'logistic_regression', X, Y, 0.60)

  y = column_or_1d(y, warn=True)


Keras classifier chosen


  y = column_or_1d(y, warn=True)
100%|██████████| 1/1 [00:05<00:00,  5.86s/it]


Scan Finished!


### Dataset  #7

#### Covertype dataset

In [25]:
import automation_script
import pandas as pd
import numpy as np
from os import path

dataset_name = "covertype"
dataset_info = automation_script.get_dataset_info(dataset_name)

data = pd.read_csv("../data/covtype.data.csv", delimiter=",", header=None, index_col=False)

data = data[data[54].isin([1,2])]

Y = data.iloc[:, -1]
X = data.iloc[:,:-1]

# Normalize the X values #
# from sklearn.preprocessing import StandardScaler
# sc = StandardScaler()
# X = sc.fit_transform(X)


Using TensorFlow backend.


In [17]:
params = {
    "epochs": 200,
    "batch_size":100
}

automation_script.run_imly(dataset_info, 'logistic_regression', X, Y, 0.60, params=params)

Keras classifier chosen


100%|██████████| 1/1 [11:53<00:00, 713.32s/it]


Scan Finished!


### Dataset  #8

#### TestData1 dataset

In [53]:
import automation_script
import pandas as pd
import numpy as np
from os import path

dataset_name = "test_data_1"
dataset_info = automation_script.get_dataset_info(dataset_name)

data = pd.read_csv("../data/testData1.csv", delimiter=",", header=0, index_col=0)


Y = data.iloc[:, -1]
X = data.iloc[:,:-1]

In [None]:
Y

In [54]:
params = {
    "epochs": 10,
    "batch_size":10
}

automation_script.run_imly(dataset_info, 'logistic_regression', X, Y, 0.60, params=params)

Keras classifier chosen


100%|██████████| 1/1 [00:01<00:00,  1.22s/it]


Scan Finished!


### Dataset  #9

#### TestData2 dataset

In [14]:
import automation_script
import pandas as pd
import numpy as np
from os import path

dataset_name = "test_data_2"
dataset_info = automation_script.get_dataset_info(dataset_name)

data = pd.read_csv("../data/testData2.csv", delimiter=",", header=0, index_col=0)


Y = data.iloc[:, -1]
X = data.iloc[:,:-1]

In [None]:
Y

In [16]:
unique,count = np.unique(Y,return_counts=True)
class1=count[0]/X.shape[0]*100
class2=count[1]/X.shape[0]*100
class_distribution = round(class1, 2)
unique

array([0, 1], dtype=int64)

In [52]:
params = {
    "epochs": 1000,
    "batch_size":100
}

automation_script.run_imly(dataset_info, 'logistic_regression', X, Y, 0.60, params=params)

Keras classifier chosen


100%|██████████| 1/1 [00:12<00:00, 12.33s/it]


Scan Finished!


### Dataset  #10

#### UCI Airfoil dataset

In [40]:
import automation_script
import pandas as pd
import numpy as np
from os import path
from sklearn.preprocessing import StandardScaler

dataset_name = "uci_airfoil"
dataset_info = automation_script.get_dataset_info(dataset_name)

data = pd.read_csv("../data/uci_airfoil_self_noise.csv", delimiter=",", header=0, index_col=0)
sc = StandardScaler()
data = sc.fit_transform(data)
data = pd.DataFrame(data)


Y = data.iloc[:, -1]
X = data.iloc[:,:-1]

In [41]:
automation_script.run_imly(dataset_info, 'linear_regression', X, Y, 0.60)

100%|██████████| 1/1 [00:08<00:00,  8.99s/it]


Scan Finished!


### Dataset  #11

#### UCI Auto-mpg dataset

In [42]:
import automation_script
import pandas as pd
import numpy as np
from os import path
from sklearn.preprocessing import StandardScaler

dataset_name = "uci_auto_mpg"
dataset_info = automation_script.get_dataset_info(dataset_name)

data = pd.read_csv("../data/uci_auto_mpg.csv", delimiter=",", header=0, index_col='car name')
data = data[data.horsepower != '?']
sc = StandardScaler()
data = sc.fit_transform(data)
data = pd.DataFrame(data)


Y = data.iloc[:,1]
X = data.iloc[:,2:]

In [43]:
automation_script.run_imly(dataset_info, 'linear_regression', X, Y, 0.60)

100%|██████████| 1/1 [00:02<00:00,  2.73s/it]


Scan Finished!


---

## Test bed ##

In [3]:
from sklearn.model_selection import train_test_split
import copy

model_name = 'linear_regression'
model_mappings = {
    'linear_regression': 'LinearRegression',
    'logistic_regression': 'LogisticRegression'
}

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)

for key, value in model_mappings.items():
    if key == model_name:
        name = value

module = __import__('sklearn.linear_model', fromlist=[name])
imported_module = getattr(module, name)
model = imported_module

primal_model = model()

# Primal
primal_model.fit(x_train, y_train)
primal_model.__class__.__name__

'LinearRegression'

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing


import experiment_automation_script
from os import path
import pandas as pd
from sklearn import preprocessing

dataset_info = experiment_automation_script.get_dataset_info("diabetes")
url = "../data/diabetes.csv" if path.exists("../data/diabetes.csv") else dataset_info['url']
data = pd.read_csv(url, delimiter=",", header=None, index_col=False)
sc = StandardScaler()
data = sc.fit_transform(data)
data = pd.DataFrame(data)


X = data.iloc[:,:-1]
Y = data.iloc[:,-1]

# diabetes = datasets.load_diabetes()
# sc = StandardScaler()
# diabetes = sc.fit_transform(diabetes)
#####
# # Use only one feature
# diabetes_X = diabetes.data
# # sc = StandardScaler()
# # diabetes.data = sc.fit_transform(diabetes.data)

# X = diabetes.data
# Y = diabetes.target
#####

# X = preprocessing.scale(X)

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)

# # Split the data into training/testing sets
# x_train = diabetes_X[:-20]
# x_test = diabetes_X[-20:]

# # Split the targets into training/testing sets
# y_train = diabetes.target[:-20]
# y_test = diabetes.target[-20:]


In [27]:
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990842, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06832974, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286377, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04687948,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452837, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00421986,  0.00306441]])

In [3]:
import winmltools
winmltools.__file__

'C:\\Users\\shakk\\Anaconda2\\envs\\py36\\lib\\site-packages\\winmltools\\__init__.py'

In [3]:
import onnxmltools

def f1(**kwargs):
    params_json = json.load(open('../imly/architectures/sklearn/params.json'))
    params = params_json['params'][kwargs['param_name']]
    kwargs.setdefault('params', params)
    kwargs.setdefault('x_train', np.array([[1], [2]]))

    model = Sequential()
    model.add(Dense(1,
                    input_dim=10,
                    activation='linear'))

    model.compile(optimizer='adam',
                  loss='mean_squared_logarithmic_error',
                  metrics=['mse'])
    return model

In [8]:
import json
import numpy as np
from keras import Sequential
from keras.layers import Dense

model = f1(param_name='glm_1')
model.fit(x_train, y_train, epochs=500, batch_size=30)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 

Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 

Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 

Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 

Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


<keras.callbacks.History at 0x185fe3e27b8>

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

model = LinearRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
score = mean_squared_error(y_test, y_pred)
score

0.5481227216244245

In [19]:
type(model)
# cross check import (f1p1 and f2p2 combination) - Is it possible to edit after the export-import flow

onnx.onnx_ml_pb2.ModelProto

In [18]:
import onnx
onnx.save(model, './onnx_model.onnx')

---

In [40]:
mapping = { "KerasClassifier": {
    "LogisticRegression": True
},
 "KerasRegressor": {
     "LinearRegression": True
 }
}

name = "LinearRegression"

for key, value in mapping.items():
    test = mapping[key]
    print(test)

{'LogisticRegression': True}
{'LinearRegression': True}


In [2]:
from keras.models import Sequential
from keras.layers.core import Dense
import numpy as np
import json


def glm(**kwargs):  # Should param_name be optional or mandatory?

    # kwargs.setdefault('param_name', 'glm_1')
    params_json = json.load(open('../imly/architectures/sklearn/params.json')) # Remove and make it generic
    params = params_json['params'][kwargs['param_name']]
    kwargs.setdefault('params', params)
    kwargs.setdefault('x_train', np.array([[1], [2]]))

    model = Sequential()
    model.add(Dense(kwargs['params']['first_neuron'], # Change first_neuron to input_size
                    input_dim=kwargs['x_train'].shape[1], # Find a better way to pass input_dim. Through params maybe?
                    activation=kwargs['params']['activation']))

    model.compile(optimizer=kwargs['params']['optimizer'],
                  loss=kwargs['params']['losses'],
                  metrics=["accuracy"])
    return model

In [6]:
glm.__call__(param_name="log_reg")

<keras.engine.sequential.Sequential at 0x17f81a0aeb8>

In [1]:
import experiment_automation_script
from os import path
import pandas as pd
from sklearn.model_selection import train_test_split

# dataset_info = experiment_automation_script.get_dataset_info("diabetes")
url = "../data/diabetes.csv" if path.exists("../data/diabetes.csv") else dataset_info['url']
data = pd.read_csv(url, delimiter=",", header=None, index_col=False)

X = data.iloc[:,:-1]
Y = data.iloc[:,-1]

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)

Using TensorFlow backend.


In [2]:

from keras import backend as K
from keras.models import Sequential
from keras.layers.core import Dense
import numpy as np
import json
from sklearn.metrics import r2_score
from keras.wrappers.scikit_learn import KerasRegressor
import random

random.seed(7)
# def create_model():
def coeff_determination(y_true, y_pred): # Read and understand the workflow
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) ) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

params_json = json.load(open('../imly/architectures/sklearn/params.json')) # Remove and make it generic
params = params_json['params']['log_reg']

model = Sequential()
model.add(Dense(1,  # Change first_neuron to input_size
                input_dim=10,  # Find a better way to pass input_dim. Through params maybe?
                activation='sigmoid'))

model.compile(optimizer='adam',
            loss='binary_crossentropy',
            metrics=[coeff_determination])  # Dealing with accuracy in regression models
#     return model

# model = KerasRegressor(build_fn=create_model)
# model.fit(x_train, y_train)
# model.score(x_test,y_test)
model.evaluate(x_test, y_test)



[1.5110251939386354, -4.287840857541651]

In [None]:

class kerasWrapper(build_fn):
    def __init__():
        self.build_fn = build_fn


class myWrapper(kerasWrapper):
    def __init__(self, build_fn, **kwargs):
        super(kerasWrapper, self).__init__(build_fn=build_fn)
        
    def fit():
        

class create_model(**kwargs):
    def __init__(self, kwargs):
        try:
            self.x_train = kwargs['x_train']
        except KeyError:
            self.x_train = None
            
    def __call__():
        print(self.x_train)
        


build_fn = create_model()

model = myWrapper(build_fn=build_fn)

In [2]:
from winmltools import convert_sklearn
from sklearn.linear_model import LogisticRegression
from onnxmltools.convert.common.data_types import FloatTensorType, Int64TensorType

model = LogisticRegression()
model.fit(X, Y)

onnx_model = convert_sklearn(model, 7, initial_types=[('input', FloatTensorType([1, 2]))])


Seed --  __root__
scope --  ['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'declare_local_operator', 'declare_local_variable', 'delete_local_operator', 'delete_local_variable', 'find_sink_variables', 'get_local_variable_or_declare_one', 'get_onnx_variable_name', 'get_unique_operator_name', 'get_unique_variable_name', 'name', 'onnx_operator_names', 'onnx_variable_names', 'operators', 'parent_scopes', 'target_opset', 'variable_name_mapping', 'variables']
Seed --  input
Seed --  SklearnLinearClassifier
Seed --  label
Seed --  probabilities
Seed --  LinearClassifier
Seed --  probability_tensor
Seed --  probability_tensor_normalized
Seed --  Normalizer
Seed --  ZipMap


The maximum opset needed by this model is only 1.


In [8]:
model_name = "LogisticRegression"

wrapper_mapping_json = json.load(open('../imly/wrappers/keras_wrapper_mapping.json'))

for key, value in wrapper_mapping_json.items():
    for name in value:
        print(name)
        if model_name == name:
            wrapper = key
            
wrapper

LogisticRegression
LinearRegression


'SklearnKerasClassifier'

In [46]:
import re

wrapper_class = 'SklearnKerasClassifier'

path = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', wrapper_class)
module_path = re.sub('([a-z0-9])([A-Z])', r'\1_\2', path).lower()
package_name = module_path.split('_')[0]
wrapper_name = '_'.join(module_path.split('_')[1:3])

module_path = 'wrappers.' + package_name + '.' + wrapper_name
module_path
wrapper_module = __import__(module_path, fromlist=[wrapper_class])
function = getattr(wrapper_module, wrapper_class)
function

wrappers.sklearn.keras_classifier.SklearnKerasClassifier

In [None]:
module_path = 'sklearn_keras_classifier'
'_'.join(module_path.split('_')[1:3]) 

In [25]:
import pandas as pd
url = "../data/uci_auto_mpg.csv"
data = pd.read_csv(url, delimiter=",", header=0, index_col=0)
data
# frames = [X, Y]
# data = pd.concat(frames, axis=1)
# data.to_csv('../data/uci_auto_mpg.csv')

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino
5,15.0,8,429.0,198,4341,10.0,70,1,ford galaxie 500
6,14.0,8,454.0,220,4354,9.0,70,1,chevrolet impala
7,14.0,8,440.0,215,4312,8.5,70,1,plymouth fury iii
8,14.0,8,455.0,225,4425,10.0,70,1,pontiac catalina
9,15.0,8,390.0,190,3850,8.5,70,1,amc ambassador dpl
