In [72]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import pickle
import yaml

In [73]:
from sklearn.preprocessing import LabelEncoder

In [74]:
SEED: int = 42
TEST_SIZE: float = 0.2
PENALTY: str = 'l2' 
DATA: str = 'iris.csv'
C: float = 1.0
MULTI_CLASS: str = 'multinomial'
target = 'target'
df = pd.read_csv(DATA)

In [75]:
features = list(df.columns)
features.remove(target)
X = df[features]
y = df[target]

In [76]:
le = LabelEncoder()
le.fit(y)
y = le.fit_transform(y)

In [77]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_SIZE, random_state=SEED)

In [78]:
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

In [79]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [80]:
logistic_reg = LogisticRegression(C=C, random_state=SEED, penalty=PENALTY, multi_class=MULTI_CLASS)
logistic_reg.fit(X_train_std, y_train)

In [81]:
preds = logistic_reg.predict(X_test_std)

In [82]:
accuracy = accuracy_score(y_test, preds)
precision = precision_score(y_test, preds, average='micro')
recall = recall_score(y_test, preds, average='micro')
f1 = f1_score(y_test, preds, average='micro')

In [83]:
metrics = {}
metrics['accuracy_lr'] = float(accuracy)
metrics['precision_lr'] = float(precision)
metrics['recall_lr'] = float(recall)
metrics['f1_lr'] = float(f1)

In [84]:
with open('metrics_lr.yaml', 'w') as file:
    yaml.dump(metrics, file, default_flow_style=False)

In [85]:
test_classes = pd.DataFrame()
test_classes['actual_class'] = le.inverse_transform(y_test)
test_classes['predicted_class'] = le.inverse_transform(preds)


In [86]:
test_classes.to_csv('test_classes_lr.csv', index=False)


In [87]:
train_classes = pd.DataFrame()
train_classes['actual_class'] = le.inverse_transform(y_train)
train_classes['predicted_class'] = le.inverse_transform(logistic_reg.predict(X_train_std))

In [88]:
train_classes.to_csv('train_classes_lr.csv', index=False)

In [89]:
import pickle
with open('model_lr.pckl', 'wb') as file:
    pickle.dump(logistic_reg, file)