## Imports

In [1]:
import pandas as pd

from src.preprocess import preprocess_data
from src.features import build_feature_union
from src.model import train_model
from src.predict import make_submission

## Load Data

In [2]:
train = pd.read_csv("../data/train.csv")
test  = pd.read_csv("../data/test.csv")

test_ids = test["id"]  # Save for submission

## Preprocessing

In [3]:
train = preprocess_data(train)
test  = preprocess_data(test)

## Split Target

In [4]:
y = train["Tm"]
X = train.drop(columns=["Tm"])

## Feature Engineering (RDKit + Morgan)

In [5]:
feature_union = build_feature_union(smiles_col="SMILES")

X_features = feature_union.fit_transform(X)
X_test_features = feature_union.transform(test)

## Train Model

In [6]:
model, mae = train_model(X_features, y)

Validation MAE: 29.4930


## Create Submission

In [7]:
make_submission(
    model=model,
    X_test=X_test_features,
    ids=test_ids,
    output_path="../submissions/submission_v1.csv"
)


Submission saved to ../submissions/submission_v1.csv
