In [7]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import sklearn as sk
import os
os.environ["MLFLOW_ENABLE_LOGGED_MODELS"] = "false"
import mlflow
print(mlflow.get_tracking_uri())



http://localhost:5000


In [8]:
os.environ["DATABASE_URL"] = "postgresql+psycopg2://postgres:qwerty123@localhost:5432"
os.environ["MLFLOW_TRACKING_URI"] = "http://localhost:5000"

In [9]:
def load_df():
    engine = create_engine(os.getenv("DATABASE_URL")  + "/credits_total_kz")
    df = pd.read_sql("select * from macro_kz", engine)
    return df

In [10]:
def split_df(df):
    x_tr, x_val, y_tr, y_val = sk.model_selection.train_test_split(df, df['target_y'], test_size=0.2)
    x_tr = x_tr.set_index("month").drop(columns=['target_y'])
    x_val = x_val.set_index("month").drop(columns=['target_y'])
    return x_tr, x_val, y_tr, y_val

In [11]:
import os
import mlflow

def main():
    mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI", "http://localhost:5000"))
    mlflow.set_experiment(os.getenv("MLFLOW_EXPERIMENT", "credits_total_kz"))

    df = load_df()

    x_tr, x_val, y_tr, y_val = split_df(df)

    mask_tr = ~np.isnan(np.asarray(y_tr, dtype=float))
    x_tr = x_tr[mask_tr]
    y_tr = np.asarray(y_tr, dtype=float)[mask_tr]

    mask_val = ~np.isnan(np.asarray(y_val, dtype=float))
    x_val = x_val[mask_val]
    y_val = np.asarray(y_val, dtype=float)[mask_val]

    scaler = sk.preprocessing.StandardScaler()
    scaled_tr = scaler.fit_transform(x_tr)
    scaled_val = scaler.transform(x_val)

    with mlflow.start_run():
        n_estimators = 300
        model = sk.ensemble.RandomForestRegressor(n_estimators=n_estimators, random_state=42, n_jobs=-1)

        mlflow.log_param("model", "RandomForestRegressor")
        mlflow.log_param("n_estimators", n_estimators)

        model.fit(scaled_tr, y_tr)
        pred = model.predict(scaled_val)

        rmse = sk.metrics.mean_squared_error(y_val, pred) ** 0.5
        mlflow.log_metric("rmse", rmse)
        artifact_path = os.getenv("MODEL_ARTIFACT_PATH", "model")   
        mlflow.sklearn.log_model(model, artifact_path)

if __name__ == "__main__":
    main()
   

  flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)


üèÉ View run judicious-jay-659 at: http://localhost:5000/#/experiments/1/runs/8ae55dca814c469d8e6666b3ab2e40a5
üß™ View experiment at: http://localhost:5000/#/experiments/1
