In [None]:
%load_ext autoreload
%autoreload 2

import steps
from steps.base import Step, BaseTransformer, hstack_inputs
from steps.sklearn.models import make_transformer

In [None]:
from sklearn.datasets import load_diabetes
import sklearn.preprocessing as prep 
from sklearn.ensemble import RandomForestRegressor as RFR
from sklearn.externals import joblib

In [None]:
import numpy as np
import pandas as pd

class MinMaxScaler(BaseTransformer):
    def __init__(self):
        self.scaler = prep.MinMaxScaler()
        
    def fit(self, X):
        self.scaler.fit(X)
        return self

    def transform(self, X):
        X_  = self.scaler.transform(X)
        return {'X':X_}
    
    def save(self, filepath):
        joblib.dump(self.scaler, filepath)
        
    def load(self, filepath):
        self.scaler = joblib.load(filepath)
        return self
    
class Normalizer(BaseTransformer):
    def __init__(self):
        self.scaler = prep.Normalizer()
        
    def fit(self, X):
        self.scaler.fit(X)
        return self

    def transform(self, X):
        X_  = self.scaler.transform(X)
        return {'X':X_}
    
    def save(self, filepath):
        joblib.dump(self.scaler, filepath)
        
    def load(self, filepath):
        self.scaler = joblib.load(filepath)
        return self
    
class RandomForest(BaseTransformer):
    def __init__(self):
        self.estimator = RFR()
        
    def fit(self, X, y):
        self.estimator.fit(X, y)
        return self

    def transform(self, X, **kwargs):
        y_pred  = self.estimator.predict(X)
        return {'y_pred':y_pred}
    
    def save(self, filepath):
        joblib.dump(self.estimator, filepath)
        
    def load(self, filepath):
        self.estimator = joblib.load(filepath)
        return self
    
def hstack_vector_inputs(inputs):
    inputs_ = [input_.reshape(-1,1) for input_ in inputs]
    return np.hstack(inputs_)

In [None]:
X,y = load_diabetes(return_X_y=True)

In [None]:
!ls /mnt/ml-team/minerva/debug/example_problem/outputs

In [None]:
CACHE_DIR = '/mnt/ml-team/minerva/debug/example_problem'

scaler = Step(name='scaler',
                  transformer=MinMaxScaler(),
                  input_data=['input'],
                  adapter={'X':[('input','X')]},
                  cache_dirpath=CACHE_DIR
                 )

normalizer = Step(name='normalizer',
                  transformer=Normalizer(),
                  input_data=['input'],
                  adapter={'X':[('input','X')]},
                  cache_dirpath=CACHE_DIR,
                  cache_output=True
                 )

classifer = Step(name='clf',
                 transformer=RandomForest(),
                 input_data=['input'],
                 input_steps=[scaler, normalizer],                 
                 adapter={'y':([('input','y')]),
                          'X':([('scaler','X'),
                               ('normalizer','X')], hstack_inputs)
                         },
                 cache_dirpath=CACHE_DIR
                )

scaler1 = Step(name='scaler1',
                  transformer=MinMaxScaler(),
                  input_data=['input'],
                  adapter={'X':[('input','X')]},
                  cache_dirpath=CACHE_DIR
                 )

normalizer = Step(name='normalizer',
                  transformer=Normalizer(),
                  input_data=['input'],
                  adapter={'X':[('input','X')]},
                  cache_dirpath=CACHE_DIR
                 )

classifer1 = Step(name='clf1',
                 transformer=RandomForest(),
                 input_data=['input'],
                 input_steps=[scaler1, normalizer],                 
                 adapter={'y':([('input','y')]),
                          'X':([('scaler1','X'),
                               ('normalizer','X')], hstack_inputs)
                         },
                 cache_dirpath=CACHE_DIR
                )

ensemble = Step(name='ensemble',
                 transformer=RandomForest(),
                 input_data=['input'],
                 input_steps=[classifer, classifer1],                 
                 adapter={'y':([('input','y')]),
                          'X':([('clf','y_pred'),
                               ('clf1','y_pred')], hstack_vector_inputs)
                         },
                 cache_dirpath=CACHE_DIR,
                force_fitting=True
                )

In [None]:
ensemble

In [None]:
data = {'input': {'X': X,
                  'y': y,
                 },
            }

ensemble.clean_cache()
output = ensemble.fit_transform(data)

In [None]:
ensemble

In [None]:
output['y_pred'].shape

In [None]:
!ls /mnt/ml-team/minerva/debug/example_problem/outputs

In [None]:
clf = joblib.load('/mnt/ml-team/minerva/debug/example_problem/outputs/clf')

In [None]:
clf['y_pred']