In [5]:
import os
import sys

# Set the working directory to the project root
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from utils.prepare_data import load_and_prepare_data
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

# Загрузка
data = load_and_prepare_data()
X = data['X']
y = data['cls_cc50_median']  # бинарная цель

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Модели
models = {
    "LogReg": LogisticRegression(max_iter=1000),
    "RandomForest": RandomForestClassifier(),
    "GradientBoosting": GradientBoostingClassifier()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:,1]
    
    print(f"\n{name}")
    print(classification_report(y_test, y_pred))
    print("ROC-AUC:", roc_auc_score(y_test, y_prob))
    print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))



LogReg
              precision    recall  f1-score   support

           0       0.49      1.00      0.65       122
           1       0.00      0.00      0.00       129

    accuracy                           0.49       251
   macro avg       0.24      0.50      0.33       251
weighted avg       0.24      0.49      0.32       251

ROC-AUC: 0.5236052865675436
Confusion matrix:
 [[122   0]
 [129   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



RandomForest
              precision    recall  f1-score   support

           0       0.76      0.83      0.79       122
           1       0.82      0.75      0.79       129

    accuracy                           0.79       251
   macro avg       0.79      0.79      0.79       251
weighted avg       0.79      0.79      0.79       251

ROC-AUC: 0.8758101410598551
Confusion matrix:
 [[101  21]
 [ 32  97]]

GradientBoosting
              precision    recall  f1-score   support

           0       0.74      0.78      0.76       122
           1       0.78      0.74      0.76       129

    accuracy                           0.76       251
   macro avg       0.76      0.76      0.76       251
weighted avg       0.76      0.76      0.76       251

ROC-AUC: 0.8685029864023383
Confusion matrix:
 [[95 27]
 [33 96]]
