# MLFlow Notebook
>This notebook is for building modular models using MLFlow
    

# Setup

## Environment

#### Dependencies

In [21]:
import yaml, os, mlflow, json, subprocess, time
from collections import defaultdict
from IPython.display import Javascript
from sys import modules as m

## Utilities

In [61]:
# Utilities for the configuration
def build_mlp_entrypoints(MLP_entry_points, MLP_entrys = defaultdict(lambda: defaultdict(dict))):
    for ep in MLP_entry_points:
        MLP_entrys[ep]["parameters"]["data_file"] = MLP_data_file
        MLP_entrys[ep]["command"] = "python "+MLP_script_name+" -r {regularization} {data_file}"
    return MLP_entrys

def build_MLproject_file(name, env, entry, out="MLproject"):
    if ".py" or ".ipynb" in name:
        name = ''.join(name.split(".")[:-1])
        
    entry = json.loads(json.dumps(entry)) if type(entry) is not dict else entry
    _mlp={"name":name,"conda_env":env,"entry_points":entry}
    write_ordered_yaml_from_dict(_mlp, out)
            
def write_ordered_yaml_from_dict(dictionary, out):
    with open(out, 'w') as f:
        for k,v in dictionary.items(): #this is to keep order in yaml
            yaml.dump({k:v},f,default_flow_style=False)
            
def introspect_dependencies():
    return [(mn,getattr(m[mn],'__version__',None)) for mn in list(set(m) & set(globals()))]

def generate_conda_file(env_name, channels=["defaults"], out="conda.yaml", deps = None):
    deps = ["{}={}".format(d[0],d[1]) if d[1] else d[0] for d in introspect_dependencies()]
    conda_dict = {"name":env_name, "channels":channels, "dependencies":deps}
    if out: write_ordered_yaml_from_dict(conda_dict, out)
    return conda_dict

def parse_ipynb(file, SEARCH = "Model", DELIMITER = "#"):
    with open(file) as f:
        me = json.load(f)
    search_level, current_level, start_idx = None, None, None
    for idx, cell in enumerate(me['cells']):
        if cell['cell_type'] == "markdown":
            #print(idx)
            if len(cell['source']) < 1:
                break
            curname=str(cell['source'][0])
            current_level = 0
            for n in str(curname):
                if n == DELIMITER: current_level+=1
                else: break               
            if not search_level:
                if SEARCH in str(curname):
                    search_level = current_level
                    start_idx = idx
            elif current_level <= search_level:
                break
    me['cells'] = me['cells'][start_idx:idx]
    return (me)

        
def convert_nb_to(out_type,MLP_nb_name,output_name,build_dir=None,
                  section=None,delimiter="#"):
    _temp = None
    if section:
        _temp = "{}__temp__{}".format(build_dir, MLP_nb_name)
        print("Writing temporary file {}".format(_temp))
        with open(_temp,"w") as f:
            mynb = json.dumps(parse_ipynb(MLP_nb_name, section, delimiter))
            f.write(mynb)
        MLP_nb_name=_temp
    new_name = ''.join(MLP_nb_name.split(".")[:-1])+"."+''.join(output_name.split(".")[-1])
    subprocess.call("jupyter nbconvert --to {} {}".format(out_type, MLP_nb_name), shell=True)
    print("Wrote {} from {}".format(new_name, MLP_nb_name))
    if _temp:
        subprocess.call("rm {}".format(_temp), shell=True)
        print("Cleaning up temporary file {}".format(_temp))

    subprocess.call("mv {} {}".format(new_name, build_dir+output_name), shell=True)
    print("Renamed {} to {}".format(new_name, build_dir+output_name))
        
def build_package(MLP_name,MLP_nb_name, MLP_script_name, MLP_build_dir=None,MLP_conda_channels=["defaults"],
                  MLP_model_section=None, MLP_readme_section=None,MLP_env="conda.yaml",  MLP_conda_file="conda.yaml",
                 MLP_mlproject_file="MLproject", MLP_entry_points = MLP_entry_points):
    
    build_dir = create_build_directory(MLP_build_dir, timestamp=True)
    convert_nb_to("markdown",MLP_nb_name,"README.md",build_dir=build_dir);
    convert_nb_to("script",MLP_nb_name,MLP_script_name,build_dir=build_dir, section=MLP_model_section)
    build_MLproject_file(MLP_name, MLP_env, MLP_entry_points, out=build_dir+MLP_mlproject_file)
    generate_conda_file(MLP_env, channels=MLP_conda_channels, out=build_dir+MLP_conda_file)
    return build_dir     

def create_build_directory(build_dir = None, timestamp=False):
    if not build_dir:
        return None
    if timestamp:build_dir+=time.strftime("%Y%m%d-%H%M%S")+"/"
    if not os.path.exists(build_dir):
        os.makedirs(build_dir)
    return build_dir


#### Project Configuration
##### Parameters
- __MLP_name__ - name of your project
- __MLP_data_file__ - Location of data (Full path, unless copying to build)
- __MLP_entry_points__ - Dict of entry points. Detailed below.
- __MLP_nb_name__ - Notebook name. Defaults to current nb.
- __MLP_script_name__ - Name of the script to build to.


##### Defaults

- __MLP_model_section__ - Directory to build each test in.
- __MLP_readme_section__ - Directory to build each test in.
- __MLP_env__ - conda environment. This defaults to your "conda.yaml"
- __MLP_mlproject_file__ - name of script (default is MLP_nb_name)-(.ipynb)+(.py)
- __MLP_conda_channels__ - name of script (default is MLP_nb_name)-(.ipynb)+(.py)
- __MLP_conda_file__ - name of script (default is MLP_nb_name)-(.ipynb)+(.py)

##### Entry Points
- __MLP_entry_points__ - build_mlp_entrypoints() function converts a list into a defaultdict with the standard settings and can be added to in the following fashion:
    - ```MLP_entry_points["main"]["parameters"]["regularization"] = "{type: float, default: 0.1}"```

In [63]:
MLP_name = "MLFlow_Test"
MLP_data_file = "data.csv"
MLP_build_dir = "build/" + MLP_name + "/"
MLP_nb_name = "mlflow_notebook.ipynb"
MLP_script_name = ''.join(MLP_nb_name.split(".")[:-1]) + ".py"
MLP_entry_points = build_mlp_entrypoints(["main", "validate"])

# Defaults
MLP_model_section = "Model"
MLP_readme_section = None
MLP_env = "conda.yaml"
MLP_mlproject_file="MLproject"
MLP_conda_channels = ["defaults"]
MLP_conda_file = "conda.yaml"


## Build MLflow Package
To build a package, you simply call the build_package function with your settings. It will generate all files required for MLFlow into a new directory. If your build_dir is set to "", it will generate the files locally.


```build_package(MLP_name,MLP_nb_name, MLP_script_name, MLP_build_dir, MLP_model_section=MLP_model_section)```




##### Notebook to  Script/Markdown Conversion
Converts this notebook to a .py file
```
convert_nb_to_script(notebook_name = "notebook.ipynb", script_name = "test.py", section=None, include_all_deps=True)
```

Specifying ```section``` will search markdown for a certain section (as delimited by ##) and only write that to a notebook. ```include_all_deps``` also adds any import statements included anywhere else in the notebook.

##### MLproject File Generation
Writes the MLproject file with user parameters (set above, in "Project Config")

```
build_MLproject_file(name, env, entry, out="MLproject")
```

##### Conda File Generation 
Writes dependencies into a dependencies file.
```deps``` takes a list of module names (name or name=4.2), or uses introspection to determine imported modules


In [62]:
build_package(MLP_name,MLP_nb_name, MLP_script_name, MLP_build_dir, MLP_model_section=MLP_model_section)

Wrote mlflow_notebook.md from mlflow_notebook.ipynb
Renamed mlflow_notebook.md to build/test/20180712-023944/README.md
Writing temporary file build/test/20180712-023944/__temp__mlflow_notebook.ipynb
Wrote build/test/20180712-023944/__temp__mlflow_notebook.py from build/test/20180712-023944/__temp__mlflow_notebook.ipynb
Cleaning up temporary file build/test/20180712-023944/__temp__mlflow_notebook.ipynb
Renamed build/test/20180712-023944/__temp__mlflow_notebook.py to build/test/20180712-023944/mlflow_notebook.py


'build/test/20180712-023944/'

# Model

### Build the Model

Per the standard configuration (MLP_model_section), anything under the heading Model will be built into the package.

In [232]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

if __name__ == "__main__":
    warnings.filterwarnings("ignore")
    np.random.seed(40)

    # Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
    wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "wine-quality.csv")
    data = pd.read_csv(wine_path)

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
    l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5

    with mlflow.start_run():
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        predicted_qualities = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

mlflow.sklearn.log_model(lr, "model")

NameError: name '__file__' is not defined

# Evaluation

In [266]:
#### blah

# Deployment

In [None]:
#