In [1]:
import requests
import datetime

import pandas as pd
import zipfile

import mlflow
import os


from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
#set up tracking server
TRACKING_SERVER_HOST = "34.171.118.161" #external IP reserved in GCP
mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:5000")

In [3]:
print(f"tracking URI: '{mlflow.get_tracking_uri()}'")

tracking URI: 'http://34.171.118.161:5000'


In [4]:
data_file = "../data/processed/202304-usage.parquet"

In [5]:
mlflow.set_experiment("experiment-2")

2024/07/17 09:49:10 INFO mlflow.tracking.fluent: Experiment with name 'experiment-2' does not exist. Creating a new experiment.


<Experiment: artifact_location='gs://mlops-divvy-experiment-tracking/mlruns/2', creation_time=1721227750156, experiment_id='2', last_update_time=1721227750156, lifecycle_stage='active', name='experiment-2', tags={}>

In [12]:
with mlflow.start_run():
    #load prepared data
    df = pd.read_parquet(data_file)
    mlflow.log_param("data_file", data_file)

    features = df[['station_name', 'hour', 'day_of_week']]
    target = df['net_usage']

    features = pd.get_dummies(features, columns=['station_name', 'day_of_week'],dtype=float) #encoding categorical

    split_params = {"test_size": 0.2, "random_state": 42}
    X_train, X_test, y_train, y_test = train_test_split(features, target, **split_params)

    #params = {"n_estimators": 10, "random_state": 42}
    #mlflow.log_params(params)

    lr = LinearRegression().fit(X_train, y_train) #**params
    
    y_pred = lr.predict(X_test)
    mlflow.log_metric("mse", mean_squared_error(y_test, y_pred))

    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

mlflow.search_experiments()



default artifacts URI: 'gs://mlops-divvy-experiment-tracking/mlruns/2/7625cbdb282847ed97d290afe51f1547/artifacts'


[<Experiment: artifact_location='gs://mlops-divvy-experiment-tracking/mlruns/2', creation_time=1721227750156, experiment_id='2', last_update_time=1721227750156, lifecycle_stage='active', name='experiment-2', tags={}>,
 <Experiment: artifact_location='gs://mlops-divvy-experiment-tracking/mlruns/1', creation_time=1721225054205, experiment_id='1', last_update_time=1721225054205, lifecycle_stage='active', name='experiment-1', tags={}>,
 <Experiment: artifact_location='gs://mlops-divvy-experiment-tracking/mlruns/0', creation_time=1721165169497, experiment_id='0', last_update_time=1721165169497, lifecycle_stage='active', name='Default', tags={}>]