In [1]:
import wandb
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.impute import SimpleImputer
import pandas as pd

In [2]:
!poetry run wandb login

[34m[1mwandb[0m: Currently logged in as: [33mtalverinat[0m ([33mloko-bank[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
import os

os.environ["WANDB_NOTEBOOK_NAME"] = "../notebooks/baseline.ipynb"

In [4]:
df = pd.read_parquet("../data/data.parquet")

In [5]:
X = df.drop(["date", "D_12", "D_24", "D_36", "D_48", "D"], axis=1)
y = df["D_12"]

In [6]:
# Инициализация W&B
wandb.init(project='modeling baseline model', name='baseline_model')

[34m[1mwandb[0m: Currently logged in as: [33mtalverinat[0m ([33mloko-bank[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
# Train-test split
X_train_base, X_test_base, y_train_base, y_test_base = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Создание объекта SimpleImputer для заполнения пропущенных значений средним значением признака
imputer = SimpleImputer(strategy='mean')

In [9]:
# Обучение imputer на X_train_base и преобразование X_train_base и X_test_base
X_train_base_imputed = imputer.fit_transform(X_train_base)
X_test_base_imputed = imputer.transform(X_test_base)

In [10]:
# Логирование параметров
wandb.config.max_iter = 1000
wandb.config.penalty = 'l2'
wandb.config.C = 0.1
wandb.config.class_weight = 'balanced'
wandb.config.solver = 'saga'

In [11]:
# Обучение модели логистической регрессии на данных с заполненными пропущенными значениями
log_reg = LogisticRegression(
    max_iter=wandb.config.max_iter,
    penalty=wandb.config.penalty, 
    C=wandb.config.C, 
    class_weight=wandb.config.class_weight, 
    solver=wandb.config.solver
    )
log_reg.fit(X_train_base_imputed, y_train_base)



In [12]:
# Прогнозирование вероятностей
y_pred_probs_base = log_reg.predict_proba(X_test_base_imputed)[:, 1]

In [13]:
# Рассчет метрики Gini
gini_score = 2 * roc_auc_score(y_test_base, y_pred_probs_base) - 1

# Логирование Gini метрику в W&B
wandb.log({'Gini Score': gini_score})

In [14]:
gini_score

0.13319692999087573

In [15]:
# Finish W&B run
wandb.finish()

0,1
Gini Score,▁

0,1
Gini Score,0.1332
