In [1]:
import os
import sys

# Set the working directory to the project root
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from utils.prepare_data import load_and_prepare_data
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

# Загрузка
data = load_and_prepare_data()
X = data['X']
y = data['cls_ic50_median']  # бинарная цель

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Модели
models = {
    "LogReg": LogisticRegression(max_iter=1000),
    "RandomForest": RandomForestClassifier(),
    "GradientBoosting": GradientBoostingClassifier()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:,1]
    
    print(f"\n{name}")
    print(classification_report(y_test, y_pred))
    print("ROC-AUC:", roc_auc_score(y_test, y_prob))
    print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))



LogReg
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       124
           1       0.51      1.00      0.67       127

    accuracy                           0.51       251
   macro avg       0.25      0.50      0.34       251
weighted avg       0.26      0.51      0.34       251

ROC-AUC: 0.4687261874523749
Confusion matrix:
 [[  0 124]
 [  0 127]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



RandomForest
              precision    recall  f1-score   support

           0       0.70      0.73      0.72       124
           1       0.73      0.69      0.71       127

    accuracy                           0.71       251
   macro avg       0.71      0.71      0.71       251
weighted avg       0.71      0.71      0.71       251

ROC-AUC: 0.7788608077216155
Confusion matrix:
 [[91 33]
 [39 88]]

GradientBoosting
              precision    recall  f1-score   support

           0       0.69      0.69      0.69       124
           1       0.70      0.70      0.70       127

    accuracy                           0.70       251
   macro avg       0.70      0.70      0.70       251
weighted avg       0.70      0.70      0.70       251

ROC-AUC: 0.7787973075946152
Confusion matrix:
 [[86 38]
 [38 89]]
