# Demo - Features of `mlsquare.dope`

## `dope` function

In [1]:
# Load your data
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

iris = load_iris()
X = iris['data']
y = iris['target']
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.6, shuffle=True)

In [2]:
from mlsquare import dope

Using TensorFlow backend.
2019-11-07 16:17:10,699	INFO node.py:423 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-11-07_16-17-10_32653/logs.
2019-11-07 16:17:10,808	INFO services.py:363 -- Waiting for redis server at 127.0.0.1:19189 to respond...
2019-11-07 16:17:10,921	INFO services.py:363 -- Waiting for redis server at 127.0.0.1:42688 to respond...
2019-11-07 16:17:10,926	INFO services.py:760 -- Starting Redis shard with 20.0 GB max memory.
2019-11-07 16:17:10,958	INFO services.py:1384 -- Starting the Plasma object store with 1.0 GB memory using /dev/shm.


### Basic usage - `dope` without additional parameters

In [3]:
# Instantiate your primal model(the model you intend to convert to a neural network)
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()

# This single line of code enables your model to perform like a neural network
m = dope(model)

Transpiling your model to it's Deep Neural Network equivalent...


In [4]:
m.fit(X_train, y_train)

2019-11-07 16:17:15,759	INFO tune.py:60 -- Tip: to resume incomplete experiments, pass resume='prompt' or resume=True to run()
2019-11-07 16:17:15,760	INFO tune.py:211 -- Starting a new experiment.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/4 CPUs, 0/0 GPUs
Memory usage on this node: 5.3/8.2 GB

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 0/0 GPUs
Memory usage on this node: 5.3/8.2 GB
Result logdir: /home/shakkeel/ray_results/experiment_name
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - train_model_0:	RUNNING

[2m[36m(pid=32695)[0m Using TensorFlow backend.
[2m[36m(pid=32695)[0m 2019-11-07 16:17:18,445	ERROR worker.py:1412 -- Calling ray.init() again after it has already been called.
[2m[36m(pid=32695)[0m Instructions for updating:
[2m[36m(pid=32695)[0m Colocations handled automatically by placer.
[2m[36m(pid=32695)[0m Instructions for updating:
[2m[36m(pid=32695)[0m Use tf.cast instead.
[2m[36m(pid=32695)[0m 2019-11-07 16:17:26.914519: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(p

2019-11-07 16:17:27,436	INFO ray_trial_executor.py:178 -- Destroying actor for trial train_model_0. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for train_model_0:
  checkpoint: 'weights_tune_{''layer_1.units'': 1, ''layer_1.l1'': 0, ''layer_1.l2'':
    0, ''layer_1.activation'': ''sigmoid'', ''optimizer'': ''adam'', ''loss'': ''binary_crossentropy''}.h5'
  date: 2019-11-07_16-17-27
  done: false
  experiment_id: a63e26dd05234f31aabc59bb75105322
  hostname: shakkeel-TUF-GAMING-FX504GD-FX80GD
  iterations_since_restore: 1
  mean_accuracy: 0.3499999980131785
  node_ip: 192.168.1.4
  pid: 32695
  time_since_restore: 0.7397806644439697
  time_this_iter_s: 0.7397806644439697
  time_total_s: 0.7397806644439697
  timestamp: 1573123647
  timesteps_since_restore: 0
  training_iteration: 1
  
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 0/0 GPUs
Memory usage on this node: 5.5/8.2 GB
Result logdir: /home/shakkeel/ray_results/experiment_name
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - train_model_0:	RUNNING, [4 CPUs, 0 GPUs], [pid=32695], 0 s, 1 iter, 0.35 acc

[2m[36m(pid=32695)[0m 
==

<keras.engine.sequential.Sequential at 0x7f9e88f4b470>

In [5]:
m.score(X_test, y_test)



[0.5179819915029737, 0.311111111442248]

### Choosing versions of the proxy models

In [6]:
model

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [7]:
# Pass your neural network version choice as an argument to dope
m = dope(model, version='default')

Transpiling your model to it's Deep Neural Network equivalent...


In [8]:
print(m)

<mlsquare.adapters.sklearn.SklearnKerasClassifier object at 0x7f9dcff1e3c8>


### Providing `adapter` and `proxy_model` externally

In [9]:
# Load adapter and proxy_model
# Here, we are loading an adapter and a proxy_model from mlsquare for the sake of simplicity
from mlsquare import registry

proxy_model, adapter = registry[('sklearn', 'LinearRegression')]['default']

In [10]:
m = dope(model, adapter=adapter, proxy_model=proxy_model)

Transpiling your model to it's Deep Neural Network equivalent...


In [11]:
print(m)
print(proxy_model)
print(adapter)

<mlsquare.adapters.sklearn.SklearnKerasRegressor object at 0x7f9dcff1eb38>
<mlsquare.architectures.sklearn.LinearRegression object at 0x7f9e3d333588>
<class 'mlsquare.adapters.sklearn.SklearnKerasRegressor'>


## Optimizer - using `tune` for hyperparameter search

### Passing model parameters via `fit` method

In [12]:
# Without additional search parameters
m = dope(model)

m.fit(X_train, y_train)

Transpiling your model to it's Deep Neural Network equivalent...
2019-11-07 16:17:29,571	INFO tune.py:60 -- Tip: to resume incomplete experiments, pass resume='prompt' or resume=True to run()
2019-11-07 16:17:29,572	INFO tune.py:211 -- Starting a new experiment.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/4 CPUs, 0/0 GPUs
Memory usage on this node: 5.3/8.2 GB

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 0/0 GPUs
Memory usage on this node: 5.3/8.2 GB
Result logdir: /home/shakkeel/ray_results/experiment_name
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - train_model_0:	RUNNING

[2m[36m(pid=32698)[0m Using TensorFlow backend.
[2m[36m(pid=32698)[0m 2019-11-07 16:17:31,681	ERROR worker.py:1412 -- Calling ray.init() again after it has already been called.
[2m[36m(pid=32698)[0m Instructions for updating:
[2m[36m(pid=32698)[0m Colocations handled automatically by placer.
[2m[36m(pid=32698)[0m Instructions for updating:
[2m[36m(pid=32698)[0m Use tf.cast instead.
[2m[36m(pid=32698)[0m 2019-11-07 16:17:32.218702: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(p

2019-11-07 16:17:32,697	INFO ray_trial_executor.py:178 -- Destroying actor for trial train_model_0. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=32698)[0m 
Result for train_model_0:
  checkpoint: 'weights_tune_{''layer_1.units'': 1, ''layer_1.l1'': 0, ''layer_1.l2'':
    0, ''layer_1.activation'': ''sigmoid'', ''optimizer'': ''adam'', ''loss'': ''binary_crossentropy''}.h5'
  date: 2019-11-07_16-17-32
  done: false
  experiment_id: c61b0976e5724160adc1a15e0055d521
  hostname: shakkeel-TUF-GAMING-FX504GD-FX80GD
  iterations_since_restore: 1
  mean_accuracy: 0.5833333452542623
  node_ip: 192.168.1.4
  pid: 32698
  time_since_restore: 0.8713645935058594
  time_this_iter_s: 0.8713645935058594
  time_total_s: 0.8713645935058594
  timestamp: 1573123652
  timesteps_since_restore: 0
  training_iteration: 1
  
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/4 CPUs, 0/0 GPUs
Memory usage on this node: 5.4/8.2 GB
Result logdir: /home/shakkeel/ray_results/experiment_name
Number of trials: 1 ({'TERMINATED': 1})
TERMINATED trials:
 - train_model_0:	TERMINATED, [4 CPUs, 0 GPUs], [pid=32698], 0 s, 1 iter, 0.5

<keras.engine.sequential.Sequential at 0x7f9e3d15eef0>

In [13]:
m.final_model.optimizer

<keras.optimizers.Adam at 0x7f9dcc6b9438>

In [14]:
from ray import tune
m = dope(model)
params = {'optimizer':{'grid_search':['adam', 'nadam']}}
m.fit(X_train, y_train, params=params)

Transpiling your model to it's Deep Neural Network equivalent...
2019-11-07 16:17:32,798	INFO tune.py:60 -- Tip: to resume incomplete experiments, pass resume='prompt' or resume=True to run()
2019-11-07 16:17:32,798	INFO tune.py:211 -- Starting a new experiment.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/4 CPUs, 0/0 GPUs
Memory usage on this node: 5.4/8.2 GB

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 0/0 GPUs
Memory usage on this node: 5.4/8.2 GB
Result logdir: /home/shakkeel/ray_results/experiment_name
Number of trials: 2 ({'RUNNING': 1, 'PENDING': 1})
PENDING trials:
 - train_model_1_optimizer=nadam:	PENDING
RUNNING trials:
 - train_model_0_optimizer=adam:	RUNNING

[2m[36m(pid=32697)[0m Using TensorFlow backend.
[2m[36m(pid=32697)[0m 2019-11-07 16:17:34,552	ERROR worker.py:1412 -- Calling ray.init() again after it has already been called.
[2m[36m(pid=32697)[0m Instructions for updating:
[2m[36m(pid=32697)[0m Colocations handled automatically by placer.
[2m[36m(pid=32697)[0m Instructions for updating:
[2m[36m(pid=32697)[0m Use tf.cast instead.
[2m[36m(pid=32697)[0m 2019-11-07 16:17:34.863843: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports

2019-11-07 16:17:35,335	INFO ray_trial_executor.py:178 -- Destroying actor for trial train_model_0_optimizer=adam. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for train_model_0_optimizer=adam:
  checkpoint: 'weights_tune_{''layer_1.units'': 1, ''layer_1.l1'': 0, ''layer_1.l2'':
    0, ''layer_1.activation'': ''sigmoid'', ''optimizer'': ''adam'', ''loss'': ''binary_crossentropy''}.h5'
  date: 2019-11-07_16-17-35
  done: false
  experiment_id: 57607f55834c4a00b36e19e8fe076535
  hostname: shakkeel-TUF-GAMING-FX504GD-FX80GD
  iterations_since_restore: 1
  mean_accuracy: 0.7166666626930237
  node_ip: 192.168.1.4
  pid: 32697
  time_since_restore: 0.6321709156036377
  time_this_iter_s: 0.6321709156036377
  time_total_s: 0.6321709156036377
  timestamp: 1573123655
  timesteps_since_restore: 0
  training_iteration: 1
  
[2m[36m(pid=32697)[0m 
[2m[36m(pid=32696)[0m Using TensorFlow backend.
[2m[36m(pid=32696)[0m 2019-11-07 16:17:37,956	ERROR worker.py:1412 -- Calling ray.init() again after it has already been called.
[2m[36m(pid=32696)[0m Instructions for updating:
[2m[36m(pid=32696)[0m Colocations handled automatically by placer

2019-11-07 16:17:38,943	INFO ray_trial_executor.py:178 -- Destroying actor for trial train_model_1_optimizer=nadam. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for train_model_1_optimizer=nadam:
  checkpoint: 'weights_tune_{''layer_1.units'': 1, ''layer_1.l1'': 0, ''layer_1.l2'':
    0, ''layer_1.activation'': ''sigmoid'', ''optimizer'': ''nadam'', ''loss'': ''binary_crossentropy''}.h5'
  date: 2019-11-07_16-17-38
  done: false
  experiment_id: 8ddf43a3b02d41a19410e6d7c954e4c0
  hostname: shakkeel-TUF-GAMING-FX504GD-FX80GD
  iterations_since_restore: 1
  mean_accuracy: 0.7333333293596903
  node_ip: 192.168.1.4
  pid: 32696
  time_since_restore: 0.8370919227600098
  time_this_iter_s: 0.8370919227600098
  time_total_s: 0.8370919227600098
  timestamp: 1573123658
  timesteps_since_restore: 0
  training_iteration: 1
  
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 0/0 GPUs
Memory usage on this node: 5.3/8.2 GB
Result logdir: /home/shakkeel/ray_results/experiment_name
Number of trials: 2 ({'TERMINATED': 1, 'RUNNING': 1})
RUNNING trials:
 - train_model_1_optimizer=nadam:	RUNNING, [4 CPUs, 0 GPUs], [pid=32696], 0

<keras.engine.sequential.Sequential at 0x7f9dcc5f3240>

In [15]:
m.final_model.optimizer

<keras.optimizers.Nadam at 0x7f9dcc60ee10>

## Registry - Accessing mlsquare's model repository

In [16]:
from mlsquare import registry

In [17]:
module_name = 'sklearn'
model_name = 'DecisionTreeClassifier'
version = 'default'

# When provided with the above three values registry returns the corresponding
# adapter and proxy_model

proxy_model, adapter = registry[(module_name, model_name)][version]

In [18]:
print(proxy_model)
print(adapter)

<mlsquare.architectures.sklearn.DecisionTreeClassifier object at 0x7f9e3d2bf358>
<class 'mlsquare.adapters.sklearn.SklearnKerasClassifier'>


In [19]:
list(registry.data.keys())

[('sklearn', 'LogisticRegression'),
 ('sklearn', 'LinearRegression'),
 ('sklearn', 'Ridge'),
 ('sklearn', 'Lasso'),
 ('sklearn', 'ElasticNet'),
 ('sklearn', 'LinearSVC'),
 ('sklearn', 'SVC'),
 ('sklearn', 'DecisionTreeClassifier')]

## `save` and `explain` methods

In [20]:
m.save(filename="test_model")

The maximum opset needed by this model is only 7.


In [21]:
m.explain()

Coming soon...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 1)                 5         
Total params: 5
Trainable params: 5
Non-trainable params: 0
_________________________________________________________________


2019-11-07 17:14:55,865	ERROR worker.py:1780 -- The node with client ID cfdaefc73b8ade127afd024197915e32d9eff28c has been marked dead because the monitor has missed too many heartbeats from it.
