### Contributing Sklearn's decompositon SVD to mlsquare

**Fork mlsquare repository to your account and clone.**

**Or just Clone https://github.com/mlsquare/mlsquare.git**

* Navigate to `src/mlsquare/architectures` folder, Where the code for mapping `TruncatedSVD()` to `tf.linalg.svd()` resides.
* The code for mapping primal model(SVD) to corresponding TF equivalent is saved in `sklearn.py` file.

**This notebook contains following edits in succession to method 1:**
* Arranging matrix tranformation operations in architecture.
* Utilising existing `SklearnKerasRegressor` methods
* Restraining trigger of `SklearnKerasRegressor`'s standard methods.

In [1]:
import os
os.getcwd()

'/home/kev/Desktop/mlsquare_experiments/src'

In [3]:
import tensorflow as tf
tf.__version__

'1.13.1'

#### 1. Register the proxy SVD model in `mlsquare/architecture/sklearn.py` as follows

In [None]:
#from ..base import registry, BaseModel
from mlsquare.base import registry, BaseModel
from mlsquare.adapters.sklearn import SklearnKerasRegressor
from mlsquare.architectures.sklearn import GeneralizedLinearModel

from abc import abstractmethod
import tensorflow as tf
import pandas

class DimensionalityReductionModel:
    @abstractmethod
    def fit(self, X, y= None):
        """Needs Implementation in sub classes"""
        
    @abstractmethod
    def fit_transform(self, X, y=None):
        """Needs Implementation in sub classes"""
        


@registry.register
class SVD(DimensionalityReductionModel, GeneralizedLinearModel):
    def __init__(self):
        self.adapter = SklearnKerasRegressor
        #self.adapter = SklearnKerasRegressor(DimensionalityReductionModel)
        self.module_name = 'sklearn' 
        self.name = 'TruncatedSVD'
        self.version = 'default'
        model_params = {'full_matrices': False,
                       'compute_uv': True,
                      'name':None}

        self.set_params(params=model_params, set_by='model_init')
    def fit(self, X, y=None, **kwargs):
        self.fit_transform(X)
        return self
    
    def fit_transform(self, X, y=None,**kwargs):
        kwargs.setdefault('full_matrices', False)
        kwargs.setdefault('compute_uv', True)
        kwargs.setdefault('name', None)
        
        X = np.array(X, dtype= np.float32 if str(X.values.dtype)=='float32' else np.float64) if isinstance(X, pandas.core.frame.DataFrame) else np.array(X, dtype= np.float32 if str(X.dtype)=='float32' else np.float64)#changing to recommended dtype, accomodating dataframe & numpy array

        #X = np.array(X)
        #y = np.array(y)
        
        n_components= self.primal.n_components#using primal attributes passed from adapter
        n_features = X.shape[1]
        if n_components>= n_features:
                raise ValueError("n_components must be < n_features;"
                                 " got %d >= %d" % (n_components, n_features))
                
        sess= tf.Session()#for TF  1.13
        s,u,v= sess.run(tf.linalg.svd(X, full_matrices=kwargs['full_matrices'], compute_uv=kwargs['compute_uv']))#for TF  1.13
        
        self.components_= v[:n_components,:]
        X_transformed = u[:,:n_components] * s[:n_components]
        
        self.explained_variance_= np.var(X_transformed, axis=0)
        
        self.singular_values_ = s[:n_components]
        return X_transformed
    
    def transform(self, X):
        return X@self.components_.T
    
    def inverse_transform(self, X):
        return np.dot(X, self.components_)


#### 2. Using existing adapter `SklearnKerasRegressor` with minor modifications for mapping `sklearn.decomposition.TruncatedSVD`  to `tensorflow.linalg.svd` in `mlsquare/adapters/sklearn.py`  and work with sklearn methods. 

In [None]:
from mlsquare.utils.functions import _parse_params
import numpy as np
from ..architectures import sklearn

import tensorflow as tf
from keras.utils import to_categorical

class SklearnKerasRegressor():
    def __init__(self, proxy_model, primal_model, **kwargs):
        self.primal_model = primal_model
        self.proxy_model = proxy_model
        self.params = None

    def fit(self, X, y=None, **kwargs):
        self.proxy_model.X = X
        self.proxy_model.y = y
        self.proxy_model.primal = self.primal_model
        kwargs.setdefault('verbose', 0)
        kwargs.setdefault('epochs', 250)
        kwargs.setdefault('batch_size', 30)
        kwargs.setdefault('params', self.params)
        self.params = kwargs['params']

        if self.params != None: ## Validate implementation with different types of tune input
            if not isinstance(self.params, dict):
                raise TypeError("Params should be of type 'dict'")
            self.params = _parse_params(self.params, return_as='flat')
            self.proxy_model.update_params(self.params)

        #if self.proxy_model.__class__.__name in ['SVD', 'PCA']:
        if isinstance(self.proxy_model, (sklearn.DimensionalityReductionModel)):
            return self.proxy_model.fit(X)
        
        primal_model = self.primal_model
        primal_model.fit(X, y)
        y_pred = primal_model.predict(X)
        primal_data = {
            'y_pred': y_pred,
            'model_name': primal_model.__class__.__name__
        }

        self.final_model = get_best_model(X, y, proxy_model=self.proxy_model, primal_data=primal_data,
                                          epochs=kwargs['epochs'], batch_size=kwargs['batch_size'],
                                          verbose=kwargs['verbose'])
        return self.final_model  # Not necessary.
    
    def transform(self, X):
        if not isinstance(self.proxy_model, (sklearn.DimensionalityReductionModel)):
            raise AttributeError("'SklearnKerasRegressor' object has no attribute 'transform'")
        return self.proxy_model.transform(X)
    
    def fit_transform(self, X,y=None):
        if not isinstance(self.proxy_model, (sklearn.DimensionalityReductionModel)):
            raise AttributeError("'SklearnKerasRegressor' object has no attribute 'fit_transform'")
        return self.proxy_model.fit_transform(X)
    
    def inverse_transform(self, X):
        if not isinstance(self.proxy_model, (sklearn.DimensionalityReductionModel)):
            raise AttributeError("'SklearnKerasRegressor' object has no attribute 'inverse_transform'")
        return self.proxy_model.inverse_transform(X)
    
    
    def score(self, X, y, **kwargs):
        if isinstance(self.proxy_model, (sklearn.DimensionalityReductionModel)):
            raise AttributeError("'SklearnKerasRegressor' object has no attribute 'score'")

        score = self.final_model.evaluate(X, y, **kwargs)
        return score
    
    def predict(self, X):
        '''
        Pending:
        1) Write a 'filter_sk_params' function(check keras_regressor wrapper) if necessary.
        2) Data checks and data conversions
        '''
        if isinstance(self.proxy_model, (sklearn.DimensionalityReductionModel)):
            raise AttributeError("'SklearnKerasRegressor' object has no attribute 'predict'")
            
        pred = self.final_model.predict(X)
        return pred

    def save(self, filename=None):
        if filename == None:
            raise ValueError(
                'Name Error: to save the model you need to specify the filename')

        if isinstance(self.proxy_model, (sklearn.DimensionalityReductionModel)):
            raise AttributeError("'SklearnKerasRegressor' object has no attribute 'save'")
        pickle.dump(self.final_model, open(filename + '.pkl', 'wb'))

        self.final_model.save(filename + '.h5')

        onnx_model = onnxmltools.convert_keras(self.final_model)
        onnxmltools.utils.save_model(onnx_model, filename + '.onnx')

    def explain(self, **kwargs):
        # @param: SHAP or interpret
        print('Coming soon...')
        return self.final_model.summary()

* registered methods so far:

In [4]:
from mlsquare.base import registry
registry.data

Using TensorFlow backend.
2019-12-02 21:27:02,054	INFO node.py:423 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-12-02_21-27-02_14769/logs.
2019-12-02 21:27:02,196	INFO services.py:363 -- Waiting for redis server at 127.0.0.1:23611 to respond...
2019-12-02 21:27:02,319	INFO services.py:363 -- Waiting for redis server at 127.0.0.1:61413 to respond...
2019-12-02 21:27:02,324	INFO services.py:760 -- Starting Redis shard with 20.0 GB max memory.
2019-12-02 21:27:02,362	INFO services.py:1384 -- Starting the Plasma object store with 1.0 GB memory using /dev/shm.


{('sklearn',
  'TruncatedSVD'): {'default': [<mlsquare.architectures.sklearn.SVD at 0x7f35cd1ca1d0>,
   mlsquare.adapters.sklearn.SklearnKerasRegressor]},
 ('sklearn',
  'LogisticRegression'): {'default': [<mlsquare.architectures.sklearn.LogisticRegression at 0x7f35cc455a20>,
   mlsquare.adapters.sklearn.SklearnKerasClassifier]},
 ('sklearn',
  'LinearRegression'): {'default': [<mlsquare.architectures.sklearn.LinearRegression at 0x7f35cc455be0>,
   mlsquare.adapters.sklearn.SklearnKerasRegressor]},
 ('sklearn',
  'Ridge'): {'default': [<mlsquare.architectures.sklearn.Ridge at 0x7f35cc455da0>,
   mlsquare.adapters.sklearn.SklearnKerasRegressor]},
 ('sklearn',
  'Lasso'): {'default': [<mlsquare.architectures.sklearn.Lasso at 0x7f35cc455f60>,
   mlsquare.adapters.sklearn.SklearnKerasRegressor]},
 ('sklearn',
  'ElasticNet'): {'default': [<mlsquare.architectures.sklearn.ElasticNet at 0x7f35cc464160>,
   mlsquare.adapters.sklearn.SklearnKerasRegressor]},
 ('sklearn',
  'LinearSVC'): {'defau

(**Once the new model is registered & corresponding adapter is defined in mlsquare framework.**)
#### User Interaction with `dope` with sklearn SVD preference & intent to utilise underlying TF SVD 

    

    1. a) User instantiates a primal model `sklearn.decomposition.TruncatedSVD` with args --`n_components` as number of required singular components.
    b) User loads the data & proceed with necessary data preparation steps 
    
    
    2. Now, import `dope` from mlsquare & `dope` the primal model by passing primal model to dope function. The `dope` function equips above primal model with standard sklearn methods--`fit, fit_transform, save, explain.`
    
    3.  Carry on with usual sklearn SVD methods; Try out sklearn 
    methods -- `.fit( )`, `.fit_transform( )`, `.transform( )` with the doped model.

#### 1.a Instantiate primal module
* n_components: 10 (number of reduced dimensions)

In [5]:
import numpy as np
from sklearn.decomposition import TruncatedSVD

primal = TruncatedSVD(n_components=10)

In [6]:
primal.get_params()

{'algorithm': 'randomized',
 'n_components': 10,
 'n_iter': 5,
 'random_state': None,
 'tol': 0.0}

#### 1.b Following are data preparation steps required to instantiate a svd model
* Also evaluating the regression results at various stages with varying dimensions.

In [7]:
import os
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.preprocessing import LabelEncoder


import pandas as pd
reg= linear_model.LinearRegression()

boston =load_boston()
df_x= pd.DataFrame(boston.data, columns= boston.feature_names)
lbe= LabelEncoder()
df_x = df_x.apply(lambda x: lbe.fit_transform(x))#df_x[col]))
df_y= df_y= pd.DataFrame(boston.target)


xtrain, xtest, ytrain, ytest = train_test_split(df_x, df_y, test_size=0.2)
print(xtrain.shape, xtest.shape)

(404, 13) (102, 13)


In [8]:
df_x.head(3)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0,3,19,0,51,320,172,297,0,34,9,356,53
1,23,0,56,0,36,279,225,333,1,11,23,356,161
2,22,0,56,0,36,400,159,333,1,11,23,271,28


* Validating results with full dimensionality.

In [9]:
reg= linear_model.LinearRegression()
reg.fit(xtrain, ytrain)
print(reg.score(xtest, ytest))

0.7562361743176013


* Validating results with reduced dimensionality through primal model.

In [10]:
skl_truncated_x = primal.fit(df_x).transform(df_x)

xtrain, xtest, ytrain, ytest = train_test_split(skl_truncated_x, df_y, test_size=0.2)
print('sklearn_svd truncated dims:', skl_truncated_x.shape)
reg= linear_model.LinearRegression()
reg.fit(xtrain, ytrain)
print(reg.score(xtest, ytest))

sklearn_svd truncated dims: (506, 10)
0.729796445256366


#### 2. dope the model to obtain keras svd

In [11]:
from mlsquare import dope

model = dope(primal)# adapter(proxy_model=proxy_model, primal_model=primal)

Transpiling your model to it's Deep Neural Network equivalent...


In [12]:
print('proxy model object from registry:\n', model.proxy_model, '\n\ncorrespnding adapter:\n', model)

proxy model object from registry:
 <mlsquare.architectures.sklearn.SVD object at 0x7f35cd1ca1d0> 

correspnding adapter:
 <mlsquare.adapters.sklearn.SklearnKerasRegressor object at 0x7f358cb73c18>


In [13]:
??model

#### 3. Try out sklearn methods-- `.fit( )` & `.fit_transform( )` to obtain reduced dimensionality, with sklearn's `boston_dataset` from `1.b` above.
* Fitting the doped model with -- Dataframe input Or Numpy array inputs

In [14]:
inp= np.array(df_x.values, dtype= np.float64)

#dope_truncated_x=model.fit_transform(df_x) #takes in dataframe input
dope_truncated_x= model.fit_transform(inp)

dope_truncated_x.shape
#dimensionality reduced to n_components using tf.linalg.svd

(506, 10)

* Validating results with reduced dimensionality through doped model & ascertaining approximately faithful results through underlying TF method.

In [15]:
#truncated_x= model.fit(df_x).fit_transform(df_x)
xtrain, xtest, ytrain, ytest = train_test_split(dope_truncated_x, df_y, test_size=0.2)

print('doped_svd truncated dims:', dope_truncated_x.shape)

reg= linear_model.LinearRegression()
reg.fit(xtrain, ytrain)
print(reg.score(xtest, ytest))

doped_svd truncated dims: (506, 10)
0.7294817890786716


* Trying how sklearn SVD deals with anamoly methods--`.score()`, `.predict()` and implement similar error flagging for undefined apis for proxy_model.
    * Chances are a user presuming TrucnatedSVD as a usual model will try out above methods

* follwing contains error for sklearn_svd's undefined  api

In [16]:
primal.predict(df_x)
#primal is an sklearn object

AttributeError: 'TruncatedSVD' object has no attribute 'predict'

* proxy svd model flags following error on calling undefined methods, Usually. .

In [26]:
model.score(inp)# Same for model.predict(inp)
#model is a adapter object

AttributeError: 'SklearnKerasRegressor' object has no attribute 'final_model'

* Un-implemented methods flag an `AttributeError`

* proxy svd model flags following error on calling undefined methods, Now!

In [17]:
model.predict(inp)#Same for model.score(inp)
#model is a adapter object

AttributeError: 'SklearnKerasRegressor' object has no attribute 'predict'

_________________

**Questions**
* Or it could be that once the wrapper method(`.fit( )`) yields an architecture object(`model.fit(x)`), the resulting arch object should only be used to access underlying attributes--(`sigma components, Vh values`) and not for perpetual transformation say--(`.fit( ).transform( )`).?
* Or Each sklearn's native method should be used across individual adapter instances?

**Problems**
* Also `trans_input.components_` differs from `primal.components_` by some tolerance.

* following is accessing/operating architecture SVD methods directly from archi instance not on adapter instance.

In [18]:
trans_input = model.fit(inp)
trans_input.fit_transform(inp)#Here trans_input is an architecture instance

array([[ 471.04962214,  330.53051303,    8.67494127, ...,  -10.20964288,
         -11.40944565,   26.75673524],
       [ 545.473621  ,  266.09903932,   78.12796733, ...,   19.84120668,
          -4.45406447,   -5.85600368],
       [ 477.92571461,  357.67984616,  -89.8250494 , ...,   28.90475808,
          -3.75504454,   -3.6268214 ],
       ...,
       [ 533.89491136,  184.50034054, -103.19963966, ...,   29.29001292,
          13.7427487 ,   -3.59156992],
       [ 545.86519119,  132.99450123, -118.31830091, ...,   28.33873663,
          12.90476995,   -3.26489671],
       [ 463.39533327,  126.36114053,   96.81230318, ...,   29.68789887,
           7.78444792,    4.41248029]])

* why not equivalent to `primal.fit(inp).transform(inp)` / `model.fit_transform(inp)` Or `trans_input.fit_transform(inp)` ?

In [21]:
model.fit(inp).transform(inp)#same as trans_input.transform(inp2) ??

array([[ -75.56887878,  -57.8884622 ,  260.59461822, ...,  102.97383264,
         352.53301963,  -70.64307926],
       [ -72.29982996,  -50.40249459,  325.00120352, ...,  102.73029093,
         352.15956815,  -52.63267368],
       [ -58.21365467,  -49.94548524,  292.98765322, ...,  146.19322653,
         266.72016715, -101.97958854],
       ...,
       [ -55.38801771,    5.15461941,  244.974442  , ...,  164.35738679,
         358.31152541,  119.03717653],
       [ -22.34637091,    1.54294869,  262.12034469, ...,  182.74887134,
         290.10490754,   99.91750279],
       [ -65.32687223,   -1.70295098,  235.34476637, ...,   85.01131929,
         356.28068861,   47.04061476]])

In [22]:
trans_input_x= model.fit_transform(inp)
trans_input_x

array([[ 471.04962214,  330.53051303,    8.67494127, ...,  -10.20964288,
         -11.40944565,   26.75673524],
       [ 545.473621  ,  266.09903932,   78.12796733, ...,   19.84120668,
          -4.45406447,   -5.85600368],
       [ 477.92571461,  357.67984616,  -89.8250494 , ...,   28.90475808,
          -3.75504454,   -3.6268214 ],
       ...,
       [ 533.89491136,  184.50034054, -103.19963966, ...,   29.29001292,
          13.7427487 ,   -3.59156992],
       [ 545.86519119,  132.99450123, -118.31830091, ...,   28.33873663,
          12.90476995,   -3.26489671],
       [ 463.39533327,  126.36114053,   96.81230318, ...,   29.68789887,
           7.78444792,    4.41248029]])