In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from mlflow import set_tracking_uri, set_experiment, start_run, end_run, log_metric, log_param, log_artifacts

set_tracking_uri('http://mlflow:5000')
set_experiment(experiment_name='sample-mlflow')

In [2]:
candidates = {'gmat': [780,750,690,710,680,730,690,720,740,690,610,690,710,680,770,610,580,650,540,590,620,600,550,550,570,670,660,580,650,660,640,620,660,660,680,650,670,580,590,690],
              'gpa': [4,3.9,3.3,3.7,3.9,3.7,2.3,3.3,3.3,1.7,2.7,3.7,3.7,3.3,3.3,3,2.7,3.7,2.7,2.3,3.3,2,2.3,2.7,3,3.3,3.7,2.3,3.7,3.3,3,2.7,4,3.3,3.3,2.3,2.7,3.3,1.7,3.7],
              'work_experience': [3,4,3,5,4,6,1,4,5,1,3,5,6,4,3,1,4,6,2,3,2,1,4,1,2,6,4,2,6,5,1,2,4,6,5,1,2,1,4,5],
              'admitted': [1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1]
              }

df = pd.DataFrame(candidates,columns= ['gmat', 'gpa','work_experience','admitted'])
df.head()

Unnamed: 0,gmat,gpa,work_experience,admitted
0,780,4.0,3,1
1,750,3.9,4,1
2,690,3.3,3,0
3,710,3.7,5,1
4,680,3.9,4,0


In [3]:
X = df[['gmat', 'gpa','work_experience']]
y = df['admitted']

In [4]:
with start_run(run_name='Logistic Regression penalty and C test'):

    penalty = 'l2'
    C = 1.4

    log_param('penalty', penalty)
    log_param('C', C)

    clf = LogisticRegression(penalty=penalty, C=C, max_iter=10_000)
    clf.fit(X, y)
    accuracy = clf.score(X, y)

    log_metric('accuracy', accuracy)

    end_run()