In [1]:
import pandas as pd
import numpy as np

import mlflow
import sys
sys.path.insert(0, '..')
from pathlib import Path


In [2]:
# artifacts such as saved model weights, pickle files, etc
model_path = str(Path('..','models','2020-11-15-RF-model.pickle').absolute().resolve())
metadata_path = str(Path('..','metadata.txt'))
artifacts = {'pickle_model': model_path,
            'metadata': metadata_path}


In [3]:
# Serve as an MLflow wrapper for model
class ModelWrapper(mlflow.pyfunc.PythonModel):
    
    # Load in model and all required artifacts
    # context object is provided by mlflow
    # it contains all artifacts
    def load_context(self, context):
        import sklearn
        import pickle
        from src import model
        
        # load model
        with open(context.artifacts['pickle_model'], 'rb') as f:
            test_model = pickle.load(f)
        
        self.model = test_model
    
    # function which takes in pandas df and returns predicted labels 
    def predict(self, context, model_input):
        from src import model
        model_input = model_input.to_numpy()
        return model.predict(model_input, self.model)

In [4]:
# mlflow complains if directory already exists, so remove it before saving model to mlflow directory
!rm -rf ../mlflow/model
src_path = Path('../src')
mlflowpath= Path('..','mlflow','model')

mlflow.pyfunc.save_model(path=str(mlflowpath), python_model=ModelWrapper(), 
                         artifacts=artifacts, conda_env='../envtest.yml', 
                         code_path=['../src/'])

In [5]:
# load model from mlflow directory
loaded_model = mlflow.pyfunc.load_model(str(mlflowpath))

In [6]:
# test model on saved data
import pickle
with open('../test_data.pickle', 'rb') as f:
    test_data = pickle.load(f)

df = pd.DataFrame(data=test_data['valid']['X'][:3])

In [7]:
loaded_model.predict(df)

[['estrogen receptor alpha, LBD (ER, LBD): inactive',
  'estrogen receptor alpha, full (ER, full): inactive',
  'aromatase: inactive',
  'aryl hydrocarbon receptor (AhR): inactive',
  'androgen receptor, full (AR, full): inactive',
  'androgen receptor, LBD (AR, LBD): inactive',
  'peroxisome proliferator-activated receptor gamma (PPAR-gamma): inactive',
  'nuclear factor (erythroid-derived 2)-like 2/antioxidant responsive element (Nrf2/ARE): inactive',
  'heat shock factor response element (HSE): inactive',
  'ATAD5: inactive',
  'mitochondrial membrane potential (MMP): inactive',
  'p53: inactive'],
 ['estrogen receptor alpha, LBD (ER, LBD): inactive',
  'estrogen receptor alpha, full (ER, full): inactive',
  'aromatase: inactive',
  'aryl hydrocarbon receptor (AhR): inactive',
  'androgen receptor, full (AR, full): inactive',
  'androgen receptor, LBD (AR, LBD): inactive',
  'peroxisome proliferator-activated receptor gamma (PPAR-gamma): inactive',
  'nuclear factor (erythroid-deriv