In [10]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [11]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

X = train_df.drop(columns=["id", "smoking"])
y = train_df["smoking"].values

In [12]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [13]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [14]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)

X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

In [15]:
pos_weight = torch.tensor(
    (y_train == 0).sum() / (y_train == 1).sum()
)

class FocalLoss(nn.Module):
    def __init__(self, alpha=0.75, gamma=2.0):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, logits, targets):
        bce = nn.functional.binary_cross_entropy_with_logits(
            logits, targets, reduction="none"
        )
        pt = torch.exp(-bce)
        loss = self.alpha * (1 - pt) ** self.gamma * bce
        return loss.mean()

criterion = FocalLoss(alpha=0.75, gamma=2.0)


  pos_weight = torch.tensor(


In [30]:
class SmokingNN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.4),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(128, 64),
            nn.ReLU(),

            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.net(x).squeeze()

In [31]:
model = SmokingNN(X_train.shape[1])
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)

In [34]:
best_acc = 0

for epoch in range(50):
    model.train()
    optimizer.zero_grad()

    logits = model(X_train)
    loss = criterion(logits, y_train)
    loss.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
        val_logits = model(X_val)
        probs = torch.sigmoid(val_logits)
        preds = (probs > 0.5).int()
        acc = accuracy_score(y_val, preds)

    if acc > best_acc:
        best_acc = acc

    if epoch % 10 == 0:
        print(f"Epoch {epoch:02d} | Loss {loss:.4f} | Val Acc {acc:.4f}")

print("Best validation accuracy:", best_acc)


Epoch 00 | Loss 0.0762 | Val Acc 0.7967
Epoch 10 | Loss 0.0763 | Val Acc 0.7953
Epoch 20 | Loss 0.0756 | Val Acc 0.7953
Epoch 30 | Loss 0.0752 | Val Acc 0.7963
Epoch 40 | Loss 0.0749 | Val Acc 0.7950
Best validation accuracy: 0.797


In [26]:
test_X = scaler.transform(test_df.drop(columns=["id"]))
test_X = torch.tensor(test_X, dtype=torch.float32)

model.eval()
with torch.no_grad():
    preds = torch.sigmoid(model(test_X))
    preds = (preds > 0.5).int().numpy()

submission = pd.DataFrame({
    "id": test_df["id"],
    "smoking": preds
})

submission.to_csv("submission.csv", index=False)
