In [1]:
import pandas as pd
import numpy as np
import mlflow
import os

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

In [2]:
features = pd.read_csv('temps.csv')
features = pd.get_dummies(features)

labels = np.array(features['actual'])
features= features.drop('actual', axis = 1)

feature_list = list(features.columns)
features = np.array(features)

train_features, test_features, train_labels, test_labels = \
    train_test_split(features, labels, test_size = 0.25, random_state = 42)

In [3]:
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Temperature model")

n_estimators = 10000
min_samples_split = 2

with mlflow.start_run():
    rf = RandomForestRegressor(n_estimators= n_estimators,
                               min_samples_split = min_samples_split)
    rf.fit(train_features, train_labels)
    preds = rf.predict(test_features)

    errors = abs(preds - test_labels)
    mae = np.mean(errors)
    mape = 100 * (errors / test_labels)
    accuracy = 100 - np.mean(mape)
    mlflow.log_param('n_estimators', n_estimators)
    mlflow.log_param('min_samples_split', min_samples_split)
    
    mlflow.log_metric('mae', mae)
    mlflow.log_metric('accuracy', accuracy)
    
    mlflow.sklearn.log_model(rf, 'model')


2022/04/07 13:12:10 INFO mlflow.tracking.fluent: Experiment with name 'Temperature model' does not exist. Creating a new experiment.
