# Regression: CC50

Notebook trains regression models for the CC50 target.

- Загружает `data/feature_engineered_data.csv`
- Находит колонку с CC50 (по подстроке `cc50`, регистр не важен)
- Делит на train/test и вызывает `train_regression_models`
- Сохраняет результаты в `models/regression_CC50_results.json`

In [1]:
from IPython.display import display, Markdown
from pathlib import Path
import sys
repo_root = Path('..')
sys.path.append(str(repo_root / 'src'))
import pandas as pd
from sklearn.model_selection import train_test_split
from models.train_models import train_regression_models, save_model_results
from utils.pretty_result import pretty_results as pretty_results


In [None]:
feature_path = repo_root / 'data' / 'feature_engineered_data.csv'
if not feature_path.exists():
    raise FileNotFoundError(f'Feature file not found: {feature_path.resolve()}')
df = pd.read_csv(feature_path)
# find CC50-like column
target_col = next((c for c in df.columns if 'cc50' in c.lower()), None)
if target_col is None:
    raise RuntimeError('No CC50-like column found in feature file')
print('Using target column:', target_col)
# define features and target
feature_cols = [c for c in df.columns if 'cc50' not in c.lower() and not c.lower().endswith('_class')]
X = df[feature_cols]
y = df[target_col]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
results = train_regression_models(X_train, y_train, X_test, y_test, target_col)
save_model_results({target_col: results}, 'models/regression_CC50_results.json')

Using target column: CC50, mM


In [None]:
display(Markdown("## **Результаты**\n---"))
pretty_results(results)