In [24]:
import pandas as pd
import pickle
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

In [25]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("ths-taxi-experiment")

<Experiment: artifact_location='./mlruns/1', experiment_id='1', lifecycle_stage='active', name='ths-taxi-experiment', tags={}>

In [26]:
df = pd.read_csv("iris.csv")
print(df.size)
df.head()


750


Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [27]:
train_df = df.sample(n=100)
train_df.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
135,7.7,3.0,6.1,2.3,Virginica
79,5.7,2.6,3.5,1.0,Versicolor
11,4.8,3.4,1.6,0.2,Setosa
68,6.2,2.2,4.5,1.5,Versicolor
88,5.6,3.0,4.1,1.3,Versicolor


In [28]:
test_df = df.sample(n=150)
test_df.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
132,6.4,2.8,5.6,2.2,Virginica
89,5.5,2.5,4.0,1.3,Versicolor
30,4.8,3.1,1.6,0.2,Setosa
101,5.8,2.7,5.1,1.9,Virginica
139,6.9,3.1,5.4,2.1,Virginica


In [29]:

x_train,y_train = train_df.iloc[:, :-1], train_df.iloc[:, -1]
x_test,y_test   = test_df.iloc[:, :-1], test_df.iloc[:, -1]
print("X train : ",x_train.shape)
print("x test  : ",x_test.shape)

X train :  (100, 4)
x test  :  (150, 4)


In [30]:
flow_dict = {"Virginica":0,
"Versicolor":1,
"Setosa":2}


In [31]:
y_train = [flow_dict[x] for x in y_train]
y_test = [flow_dict[x] for x in y_test]

In [32]:
from sklearn.naive_bayes import GaussianNB
gnb = LogisticRegression()
gnb.fit(x_train,y_train)

In [33]:

y_pred = gnb.predict(x_test)
accuracy = np.sum(y_test == y_pred) / len(y_test)
print("Model Accuracy: ",accuracy)

Model Accuracy:  0.96


### mlflow exp tracking

In [34]:
with mlflow.start_run():
    
    mlflow.set_tag("developer","tharhtet")
    mlflow.set_tag("version","prerelease")
    
    mlflow.log_param("traig-path","iris.csv")
    mlflow.log_param("train-amount","100")
    mlflow.log_param("val-amount","150")
    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    gnb = LogisticRegression()
    gnb.fit(x_train,y_train)    
    y_pred = gnb.predict(x_test)
    accuracy = np.sum(y_test == y_pred) / len(y_test)

    filename = 'finalized_model.sav'
    pickle.dump(gnb, open(filename, 'wb'))
    
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_artifact(local_path=filename, artifact_path="models_pickle")