<a href="https://colab.research.google.com/github/nikitazhuikov/ML-projects/blob/main/TitanikSklearnML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [205]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [206]:
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature

In [207]:
titanic_df = pd.read_csv('Titanic-Dataset.csv', usecols=['Pclass', 'Survived', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked'])
titanic_df['Sex'] = pd.factorize(titanic_df['Sex'])[0]
titanic_df['Embarked'] = pd.factorize(titanic_df['Embarked'])[0]
titanic_df = titanic_df.dropna()
numerical_cols = titanic_df[['Age', 'Fare']].columns

In [208]:
# нормализация
titanic_df[numerical_cols] = StandardScaler().fit_transform(titanic_df[numerical_cols])

In [209]:
x = titanic_df.drop('Survived', axis = 1)

In [210]:
mlflow.set_tracking_uri("http://127.0.0.1:5001")
mlflow.set_experiment("log_reg")


<Experiment: artifact_location='mlflow-artifacts:/189883142389238581', creation_time=1743612627305, experiment_id='189883142389238581', last_update_time=1743612627305, lifecycle_stage='active', name='log_reg', tags={}>

In [211]:
y = titanic_df['Survived']

In [212]:
x_train, x_test, y_train,  y_test = train_test_split(x, y, test_size=0.3)
# x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5)

In [213]:
with mlflow.start_run(run_name="Log_reg_100"):
    mdl = LogisticRegression(max_iter=100)
    mdl.fit(x_train, y_train)
    y_pred = mdl.predict(x_test)
    TP = ((y_test == 1) & (y_pred == 1)).sum()
    TN = ((y_test == 0) & (y_pred == 0)).sum()
    FP = ((y_test == 0) & (y_pred == 1)).sum()
    FN = ((y_test == 1) & (y_pred == 0)).sum()
    metrics = {
        'accuracy': (TP + TN)/(TP + TN + FP + FN),
        'precision': TP  / (TP + FP),
        'recall': TP / (TP + FP),
        'sensitivity': TP / (TP + FN)
    }
    mlflow.log_metrics(metrics)

🏃 View run Log_reg_100 at: http://127.0.0.1:5001/#/experiments/189883142389238581/runs/0458dcf18bc043458310ff8f12630d51
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/189883142389238581


In [214]:
accuracy = (TP + TN)/(TP + TN + FP + FN)
precision = TP  / (TP + FP)
recall = TP / (TP + FP)
sensitivity = TP / (TP + FN)

In [215]:
print('accuracy:', accuracy)
print('precision:', precision)
print('recall:', recall)
print('sensitivity:', sensitivity)

accuracy: 0.7581395348837209
precision: 0.7164179104477612
recall: 0.7164179104477612
sensitivity: 0.5925925925925926
