In [1]:
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
# create a fake file
f = open("train.py", "a")
f.write("Now the file has more content!")
f.close()
    

In [3]:
df = pd.read_csv('datasets/50_Startups.csv')
df.head(2)

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06


In [4]:
# set parameters and load training data
n_jobs = 10

df['State']=df['State'].map({'New York':0,'Florida':1, 'California': 2}).astype(int)

In [5]:
df.head(2)

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,0,192261.83
1,162597.7,151377.59,443898.53,2,191792.06


In [6]:
# Training Data
X = df[["R&D Spend", "Administration", "Marketing Spend", "State"]]
y = df[["Profit"]]


X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7,random_state=0)

In [7]:
%%time
# Prebuild a model here
lr = LinearRegression(n_jobs=n_jobs)
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

score = lr.score(X, y)

CPU times: total: 31.2 ms
Wall time: 34.8 ms


In [9]:
# Create the experiment
mlflow.create_experiment('50 Startups LR Experiment')
mlflow.set_experiment_tag('ScikitLearn', 'LR')

In [10]:
# Set the experiment
mlflow.set_experiment('50 Startups LR Experiment')

# Start a run
run = mlflow.start_run()

In [11]:
run.info

<RunInfo: artifact_uri='file:///D:/Prg/DS/MLFlow/MLFLow_Datacamp/mlruns/681201772556970240/4fe5516c80a340428b298b7c69ec0d51/artifacts', end_time=None, experiment_id='681201772556970240', lifecycle_stage='active', run_id='4fe5516c80a340428b298b7c69ec0d51', run_name='honorable-vole-303', run_uuid='4fe5516c80a340428b298b7c69ec0d51', start_time=1723014647935, status='RUNNING', user_id='Mehdi.Omidshafiee'>

In [12]:
# Log a metric
# mlflow.log_metric('score', score)
mlflow.log_metrics({'score': score,
                    'r2score':r2_score(y_test, y_pred)})

# Log a parameter
mlflow.log_param('n_jobs', n_jobs)

# Log an artifact
mlflow.log_artifact('train.py')



In [14]:
run.data

<RunData: metrics={}, params={}, tags={'mlflow.runName': 'honorable-vole-303',
 'mlflow.source.name': 'C:\\ProgramData\\anaconda3\\Lib\\site-packages\\ipykernel_launcher.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'Mehdi.Omidshafiee'}>

In [15]:
# mlflow.end_run()

## Search Runs

In [46]:
mlflow.search_experiments(filter_string="name LIKE '50%' ")

[<Experiment: artifact_location='file:///D:/Prg/DS/MLFlow/MLFLow_Datacamp/mlruns/681201772556970240', creation_time=1723014645339, experiment_id='681201772556970240', last_update_time=1723014645339, lifecycle_stage='active', name='50 Startups LR Experiment', tags={}>]

In [35]:
mlflow.search_runs()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.score,metrics.r2score,params.n_jobs,tags.mlflow.user,tags.mlflow.runName,tags.mlflow.source.type,tags.mlflow.source.name
0,4fe5516c80a340428b298b7c69ec0d51,681201772556970240,RUNNING,file:///D:/Prg/DS/MLFlow/MLFLow_Datacamp/mlrun...,2024-08-07 07:10:47.935000+00:00,,0.94886,0.935514,10,Mehdi.Omidshafiee,honorable-vole-303,LOCAL,C:\ProgramData\anaconda3\Lib\site-packages\ipy...


In [48]:
# Filter string
r2_score_filter = "metrics.r2score > 0.95"
# Search runs
expr_name = '50 Startups LR Experiment'
mlflow.search_runs(experiment_names=[expr_name],
                   filter_string=r2_score_filter,
                   order_by=["metrics.r2score DESC"])

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time
