# Regression: IC50

Notebook trains regression models for the IC50 target.

- Загружает `data/feature_engineered_data.csv`
- Находит колонку с IC50 (по подстроке `ic50`, регистр не важен)
- Делит на train/test и вызывает `train_regression_models`
- Сохраняет результаты в `models/regression_IC50_results.json`

In [1]:
from pathlib import Path
import sys
import pandas as pd
from sklearn.model_selection import train_test_split

repo_root = Path('..')
sys.path.append(str(repo_root / 'src'))

from models.train_models import train_regression_models, save_model_results

feature_path = repo_root / 'data' / 'feature_engineered_data.csv'
if not feature_path.exists():
    raise FileNotFoundError(f'Feature file not found: {feature_path.resolve()}')
df = pd.read_csv(feature_path)
# find IC50-like column
target_col = next((c for c in df.columns if 'ic50' in c.lower()), None)
if target_col is None:
    raise RuntimeError('No IC50-like column found in feature file')
print('Using target column:', target_col)
# define features and target
feature_cols = [c for c in df.columns if 'ic50' not in c.lower() and not c.lower().endswith('_class')]
X = df[feature_cols]
y = df[target_col]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
results = train_regression_models(X_train, y_train, X_test, y_test, target_col)
print('Results for IC50 target:')
print(results)
save_model_results({target_col: results}, 'models/regression_IC50_results.json')

Using target column: IC50, mM


Results for IC50 target:
{'LinearRegression': {'mse': 2817852.8627443374, 'mae': 420.9934031173349, 'rmse': 1678.6461398234999, 'r2': -7.447833764947484}, 'Ridge': {'mse': 18000662.549005605, 'mae': 558.4432858988644, 'rmse': 4242.718768549903, 'r2': -52.965417032036186}, 'RandomForestRegressor': {'mse': 88333.1178028945, 'mae': 52.51289151130137, 'rmse': 297.20887907815694, 'r2': 0.7351801064546639}, 'ElasticNet': {'mse': 1414698.3966897952, 'mae': 294.79608702540065, 'rmse': 1189.410945253908, 'r2': -3.2412210519515128, 'best_params': {'alpha': 0.1, 'l1_ratio': 0.1}}}
