# Regression: IC50

Notebook trains regression models for the IC50 target.

- Загружает `data/feature_engineered_data.csv`
- Находит колонку с IC50 (по подстроке `ic50`, регистр не важен)
- Делит на train/test и вызывает `train_regression_models`
- Сохраняет результаты в `models/regression_IC50_results.json`

In [3]:
from pathlib import Path
import sys
from IPython.display import display, Markdown
import pandas as pd
from sklearn.model_selection import train_test_split

repo_root = Path('..')
sys.path.append(str(repo_root / 'src'))

from models.train_models import train_regression_models, save_model_results
from utils.pretty_result import pretty_results as pretty_results

In [2]:
feature_path = repo_root / 'data' / 'feature_engineered_data.csv'
if not feature_path.exists():
    raise FileNotFoundError(f'Feature file not found: {feature_path.resolve()}')
df = pd.read_csv(feature_path)
# find IC50-like column
target_col = next((c for c in df.columns if 'ic50' in c.lower()), None)
if target_col is None:
    raise RuntimeError('No IC50-like column found in feature file')
print('Using target column:', target_col)
# define features and target
feature_cols = [c for c in df.columns if 'ic50' not in c.lower() and not c.lower().endswith('_class')]
X = df[feature_cols]
y = df[target_col]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
results = train_regression_models(X_train, y_train, X_test, y_test, target_col)

save_model_results({target_col: results}, 'models/regression_IC50_results.json')

Using target column: IC50, mM


In [4]:
display(Markdown("## **Результаты**\n---"))
pretty_results(results)

## **Результаты**
---

Unnamed: 0_level_0,0
model,Unnamed: 1_level_1
LinearRegression_mse,2817853.0
LinearRegression_mae,420.9934
LinearRegression_rmse,1678.646
LinearRegression_r2,-7.447834
Ridge_mse,18000660.0
Ridge_mae,558.4433
Ridge_rmse,4242.719
Ridge_r2,-52.96542
RandomForestRegressor_mse,88333.12
RandomForestRegressor_mae,52.51289
