### Experiments using IMLY ###

This notebook contains experimental runs of IMLY with different datasets.  
The readings of these experiments can be referred to in this [sheet](https://docs.google.com/spreadsheets/d/1E5jcq2w42gN8bMIaeaRJpAdhgSVN-2XDJ_YTHe4qfwY/edit?usp=sharing).

---

### Dataset  #1

#### Diabetes dataset

In [1]:
import experiment_automation_script
from os import path
import pandas as pd

dataset_info = experiment_automation_script.get_dataset_info("diabetes")
url = "../data/diabetes.csv" if path.exists("../data/diabetes.csv") else dataset_info['url']
data = pd.read_csv(url, delimiter=",", header=None, index_col=False)

X = data.iloc[:,:-1]
Y = data.iloc[:,-1]

experiment_automation_script.dopify(dataset_info, 'linear_regression', X, Y, 0.60)

Using TensorFlow backend.
100%|██████████| 3/3 [00:03<00:00,  1.35s/it]


Scan Finished!
Epoch 1/1


AttributeError: 'SklearnKerasRegressor' object has no attribute 'evaluate'

---

### Dataset  #2

#### UCI Abalone dataset

In [2]:
import experiment_automation_script
import pandas as pd
import numpy as np
from os import path
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression


dataset_info = experiment_automation_script.get_dataset_info("uci_abalone")

names = ["sex", "length", "diameter", "height", "whole weight",
        "shucked weight", "viscera weight", "shell weight", "rings"]
url = "../data/abalone.data.csv" if path.exists("../data/abalone.data.csv") else dataset_info['url']
data = pd.read_csv(url, delimiter=",", header=None, names=names, index_col=False)
data.head()

# Check for columns that contain missing values #
col_names = data.columns

num_data = data.shape[0]

categorical_col = ['sex']
for col in categorical_col:
    b, c = np.unique(data[col], return_inverse=True)
    data[col] = c

    
# Filter dataset to contain 'rings' 9 and 10 #
data = data[data['rings'].isin([9,10])]
data['rings'] = data['rings'].map({9: 0, 10: 1})


feature_list = names[:7]
X = data.loc[:, feature_list]
Y = data[['rings']]


experiment_automation_script.dopify(dataset_info, 'logistic_regression', X, Y, 0.60)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
100%|██████████| 1/1 [00:01<00:00,  1.45s/it]


Scan Finished!
Epoch 1/1


---

### Dataset  #3

#### UCI Iris dataset

In [1]:
import experiment_automation_script
import pandas as pd
import numpy as np
from os import path

dataset_name = "uci_iris"
dataset_info = experiment_automation_script.get_dataset_info(dataset_name)

url = "../data/iris.csv" if path.exists("../data/iris.csv") else dataset_info['url']
data = pd.read_csv(url , delimiter=",", header=None, index_col=False)
class_name,index = np.unique(data.iloc[:,-1],return_inverse=True)
data.iloc[:,-1] = index
data = data.loc[data[4] != 2]
X = data.iloc[:,:-1]
Y = data.iloc[:,-1]

experiment_automation_script.dopify(dataset_info, 'logistic_regression', X, Y, 0.60)

Using TensorFlow backend.


Keras classifier chosen


100%|██████████| 1/1 [00:12<00:00, 12.74s/it]


Scan Finished!


TypeError: __call__() missing 1 required positional argument: 'inputs'

In [14]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)
np.random.seed(7)

def create_model():
    model = Sequential()
    model.add(Dense(1,input_dim=4,activation='sigmoid'))

    # Compile the model #

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model
    

model = KerasClassifier(build_fn=create_model, epochs=200, batch_size=10)
model.fit(x_train, y_train)

scores = model.score(x_test, y_test)
scores

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200


Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200


Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200


Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


0.9333333174387614

In [5]:
model.get_params()

{'build_fn': <function __main__.create_model()>}

---

### Dataset  #4

#### UCI Adult salary dataset

In [6]:
import experiment_automation_script
import pandas as pd
import numpy as np
from os import path

dataset_name = "uci_adult_salary"
dataset_info = experiment_automation_script.get_dataset_info(dataset_name)


names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num',
         'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 
         'hours-per-week', 'native-country', 'target']
url = "../data/iris.csv" if path.exists("../data/dataset.csv.csv") else dataset_info['url']
data = pd.read_csv(url, delimiter=" ", header=None, names=names)


data = data[data["workclass"] != "?"]
data = data[data["occupation"] != "?"]
data = data[data["native-country"] != "?"]

# Convert categorical fields #
categorical_col = ['workclass', 'education', 'marital-status', 'occupation',
                   'relationship', 'race', 'sex', 'native-country', 'target']

for col in categorical_col:
    b, c = np.unique(data[col], return_inverse=True)
    data[col] = c

feature_list = names[:14]
# Test train split #
X = data.loc[:, feature_list]
Y = data[['target']]

experiment_automation_script.dopify(dataset_info, 'logistic_regression', X, Y, 0.60)

# Split the dataset into test and train datasets

# from sklearn.model_selection import train_test_split
# x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
100%|██████████| 1/1 [00:33<00:00, 33.26s/it]


Scan Finished!
Epoch 1/1


---

### Dataset  #5

#### UCI Ad dataset

In [8]:
import experiment_automation_script
import pandas as pd
import numpy as np
from os import path
from sklearn.preprocessing import LabelEncoder

dataset_name = "uci_ad"
dataset_info = experiment_automation_script.get_dataset_info(dataset_name)

url = "../data/ad.data.csv" if path.exists("../data/dataset.csv.csv") else dataset_info['url']
data = pd.read_csv(url, delimiter=",", header=None, index_col=False)

# Check for columns that contain missing values #

data = data.applymap(lambda val: np.nan if str(val).strip() == '?' else val)
data = data.dropna()


# Label encoding #

lb = LabelEncoder()
Y = lb.fit_transform(data.iloc[:, -1])

X = data.iloc[:,:-1]

# Normalize the X values #
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)
X = pd.DataFrame(X)
Y = pd.DataFrame(Y)

experiment_automation_script.dopify(dataset_info, 'logistic_regression', X, Y, 0.60)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
100%|██████████| 1/1 [00:02<00:00,  2.51s/it]


Scan Finished!
Epoch 1/1


---

### Dataset  #6

#### UCI Mushroom dataset

In [3]:
from sklearn.model_selection import train_test_split
import copy

model_name = 'linear_regression'
model_mappings = {
    'linear_regression': 'LinearRegression',
    'logistic_regression': 'LogisticRegression'
}

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)

for key, value in model_mappings.items():
    if key == model_name:
        name = value

module = __import__('sklearn.linear_model', fromlist=[name])
imported_module = getattr(module, name)
model = imported_module

primal_model = model()

# Primal
primal_model.fit(x_train, y_train)
primal_model.__class__.__name__

'LinearRegression'

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.preprocessing import StandardScaler

diabetes = datasets.load_diabetes()

# Use only one feature
diabetes_X = diabetes.data
# sc = StandardScaler()
# diabetes.data = sc.fit_transform(diabetes.data)

X = diabetes.data
Y = diabetes.target

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)

# # Split the data into training/testing sets
# x_train = diabetes_X[:-20]
# x_test = diabetes_X[-20:]

# # Split the targets into training/testing sets
# y_train = diabetes.target[:-20]
# y_test = diabetes.target[-20:]


In [23]:
# temp_data = np.column_stack([X,Y])
# np.savetxt("diabetes.csv", temp_data, delimiter=",")

In [3]:
import winmltools
winmltools.__file__

'C:\\Users\\shakk\\Anaconda2\\envs\\py36\\lib\\site-packages\\winmltools\\__init__.py'

In [9]:
import onnxmltools

def f1(**kwargs):
    params_json = json.load(open('../imly/architectures/sklearn/params.json'))
    params = params_json['params']
    kwargs.setdefault('params', params)
    kwargs.setdefault('x_train', np.array([[1], [2]]))

    model = Sequential()
    model.add(Dense(kwargs['params']['first_neuron'],
                    input_dim=kwargs['x_train'].shape[1],
                    activation=kwargs['params']['activation']))

    model.compile(optimizer=kwargs['params']['optimizer'],
                  loss=kwargs['params']['losses'],
                  metrics=['acc'])
    onnx_model = onnxmltools.convert_keras(model, target_opset=8)
    print(type(model))
    onnx_model
    return onnx_model

In [10]:
import json
import numpy as np
from keras import Sequential
from keras.layers import Dense

model = f1()

The maximum opset needed by this model is only 7.


<class 'keras.engine.sequential.Sequential'>


In [19]:
type(model)
# cross check import (f1p1 and f2p2 combination) - Is it possible to edit after the export-import flow

onnx.onnx_ml_pb2.ModelProto

In [18]:
import onnx
onnx.save(model, './onnx_model.onnx')

---

## Test bed ##

In [40]:
mapping = { "KerasClassifier": {
    "LogisticRegression": True
},
 "KerasRegressor": {
     "LinearRegression": True
 }
}

name = "LinearRegression"

for key, value in mapping.items():
    test = mapping[key]
    print(test)

{'LogisticRegression': True}
{'LinearRegression': True}
