In [1]:
import mlflow
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
import os
import warnings
import sys

import mlflow
import mlflow.sklearn

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

In [3]:
remote_server_uri = "http://127.0.0.1:5000"
mlflow.set_tracking_uri(remote_server_uri)

In [4]:
mlflow.tracking.get_tracking_uri()

'http://127.0.0.1:5000'

In [7]:
experiment_id = mlflow.create_experiment("Housing_Price_Linear_Regression_Parent_Child")
    
with mlflow.start_run(
        run_name="Linear_Regression_PR",
        experiment_id=experiment_id,
        tags={"version": "v1", "priority": "P1"},
        description="Data Preprocessing",
        
) as parent_run:
    dh = pd.read_csv("housing.csv")
    
    dh["rooms_per_household"] = dh["total_rooms"]/dh["households"]
    dh["bedrooms_per_room"] = dh["total_bedrooms"]/dh["total_rooms"]
    dh["population_per_household"]=dh["population"]/dh["households"]
    
    
    dh["total_bedrooms"].fillna(dh["total_bedrooms"].median(), inplace=True)
    dh["bedrooms_per_room"].fillna(dh["bedrooms_per_room"].median(), inplace=True)
    
    dh_num = dh.select_dtypes(include=[np.number])
    dh_char = dh.select_dtypes(include=[np.object])

    from sklearn.preprocessing import StandardScaler

    ss=StandardScaler()
    dh_num_scale = pd.DataFrame(ss.fit_transform(dh_num),columns=dh_num.columns)

    dh_char_dum = pd.get_dummies(data=dh_char, drop_first=True)
    dh_num_scale=dh_num_scale.reset_index(drop=True)
    dh_char_dum = dh_char_dum.reset_index(drop=True)
    dh_clean = pd.concat([dh_num_scale,dh_char_dum],axis=1)
    dh_clean.to_csv('housing_clean.csv')
    
    def eval_metrics(actual, pred):
        # compute relevant metrics
        rmse = np.sqrt(mean_squared_error(actual, pred))
        mae = mean_absolute_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, mae, r2


    def load_data(data_path):
        data = pd.read_csv(data_path)

        # Split the data into training and test sets. (0.75, 0.25) split.
        train, test = train_test_split(data)

        # The predicted column is "quality" which is a scalar from [3, 9]
        train_x = train.drop(["median_house_value"], axis=1)
        test_x = test.drop(["median_house_value"], axis=1)
        train_y = train[["median_house_value"]]
        test_y = test[["median_house_value"]]
        return train_x, train_y, test_x, test_y
    
    warnings.filterwarnings("ignore")
    np.random.seed(40)
    data_path = "housing_clean.csv"
    train_x, train_y, test_x, test_y = load_data(data_path)
    mlflow.log_param("Data Split",'yes')
    with mlflow.start_run(
        run_name="MODELLING",
        experiment_id=experiment_id,
        description="Modelling",
        nested=True,
        
        ) as child_run:
        
            run_name="CHILD_RUN",
            experiment_id=experiment_id,
            description="child",
            nested=True,

            # Execute ElasticNet
            lr = LinearRegression()
            lr.fit(train_x, train_y)


            # Evaluate Metrics
            predicted_qualities = lr.predict(test_x)
            (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)


            # Print out metrics
            print("Linear Regression  model:")
            print("  RMSE: %s" % rmse)
            print("  MAE: %s" % mae)
            print("  R2: %s" % r2)

            mlflow.log_metric(key="rmse", value=rmse)
            mlflow.log_metrics({"mae": mae, "r2": r2})
            mlflow.log_artifact(data_path)
            print("Save to: {}".format(mlflow.get_artifact_uri()))
            mlflow.sklearn.log_model(lr, "model")
            mlflow.log_param("Modelling", "yes")
            
    with mlflow.start_run(
       
        run_name="CHILD_RUN",
        experiment_id=experiment_id,
        description="child",
        nested=True,
        
    ) as child_run:
        mlflow.log_param("child_1", "yes")   
            
        
print("parent run:")

print("run_id: {}".format(parent_run.info.run_id))
print("description: {}".format(parent_run.data.tags.get("mlflow.note.content")))
print("version tag value: {}".format(parent_run.data.tags.get("version")))
print("priority tag value: {}".format(parent_run.data.tags.get("priority")))
print("--")

# Search all child runs with a parent id
query = "tags.mlflow.parentRunId = '{}'".format(parent_run.info.run_id)
results = mlflow.search_runs(experiment_ids=[experiment_id], filter_string=query)


Linear Regression  model:
  RMSE: 0.5953237025985277
  MAE: 0.4298795413904609
  R2: 0.6529459027404896
Save to: mlruns/5/3e5a40d45aa44fed8e6634f2b26d070d/artifacts
parent run:
run_id: f1f3dd607c814c479a137295bb2bd568
description: Data Preprocessing
version tag value: v1
priority tag value: P1
--
