# Neural Network

In [37]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, roc_auc_score
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm


# Load dataset
df = pd.read_csv("hotel_data_cleaned.csv")  # alr scaled, clean data


In [38]:
df['arrival_date_in_3_month'].value_counts()

arrival_date_in_3_month
0.0    74387
1.0    12990
Name: count, dtype: int64

In [39]:
# Target
target_col = 'is_canceled'

# Encode categorical features
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split features/target
X = df.drop(columns=[target_col])
y = df[target_col]

# Train-test split (most recent 3 months as test set)
X_train = X[X['arrival_date_in_3_month'] == 0].drop(columns=['arrival_date_in_3_month'])
X_test = X[X['arrival_date_in_3_month'] == 1].drop(columns=['arrival_date_in_3_month'])
y_train = y[X['arrival_date_in_3_month'] == 0]
y_test = y[X['arrival_date_in_3_month'] == 1]

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Create PyTorch Datasets and Loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

In [None]:
# --- Neural Network Definition ---
class CancellationNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, dropout):
        super(CancellationNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),    # Input layer
            nn.ReLU(),                     # Activation function
            nn.Dropout(dropout),    # Dropout for regularization
            nn.Linear(hidden_dim, 1),   # Output layer
            nn.Sigmoid()    # Sigmoid activation (Squashes output to range [0, 1]) for binary classification
        )

    def forward(self, x):
        return self.model(x).squeeze()

# --- Training Function ---
def train_model(X, y, params, device):
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    val_aucs = []

    for train_idx, val_idx in skf.split(X, y):
        X_tr = X_train_tensor[train_idx]
        X_val = X_train_tensor[val_idx]
        y_tr = y_train_tensor[train_idx]
        y_val = y_train_tensor[val_idx]

        model = CancellationNet(X.shape[1], params['hidden_dim'], params['dropout']).to(device)
        criterion = nn.BCELoss()    # Binary Cross-Entropy Loss for binary classification
        optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])   # using Adam optimizer

        train_ds = TensorDataset(X_tr, y_tr)    
        val_ds = TensorDataset(X_val, y_val)
        train_loader = DataLoader(train_ds, batch_size=params['batch_size'], shuffle=True)

        model.train()
        for epoch in range(params['epochs']):
            for xb, yb in train_loader:
                xb, yb = xb.to(device), yb.to(device)
                optimizer.zero_grad()
                preds = model(xb)
                loss = criterion(preds, yb)
                loss.backward()
                optimizer.step()    

        # Eval
        model.eval()
        with torch.no_grad():
            preds = model(X_val.to(device)).cpu().numpy()   # Get predictions
            auc = roc_auc_score(y_val.numpy(), preds)   # Calculate AUC
            val_aucs.append(auc)

    return np.mean(val_aucs)

In [42]:
# Hyperparameter Tuning
param_grid = [
    {'lr': 1e-3, 'hidden_dim': 64, 'dropout': 0.3, 'batch_size': 64, 'epochs': 10},
    {'lr': 1e-4, 'hidden_dim': 128, 'dropout': 0.5, 'batch_size': 32, 'epochs': 15},
    {'lr': 5e-4, 'hidden_dim': 256, 'dropout': 0.4, 'batch_size': 64, 'epochs': 12},
]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
results = []

for config in tqdm(param_grid, desc="Tuning"):  # tqdm for progress bar
    avg_auc = train_model(X_train, y_train, config, device)
    config['val_auc'] = avg_auc
    results.append(config)

# Sort by best AUC
results = sorted(results, key=lambda x: x['val_auc'], reverse=True)

Tuning: 100%|██████████| 3/3 [04:17<00:00, 85.95s/it]


In [44]:
# Summary of results
print("\nHyperparameter Tuning Results:")
for i, res in enumerate(results):
    print(f"{i+1}. AUC={res['val_auc']:.4f} | Config={res}")

# Evaluate Best on Test Set
best_params = results[0]
final_model = CancellationNet(X_train.shape[1], best_params['hidden_dim'], best_params['dropout']).to(device)
optimizer = torch.optim.Adam(final_model.parameters(), lr=best_params['lr'])
criterion = nn.BCELoss()    # Binary Cross-Entropy Loss for binary classification

train_ds = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_ds, batch_size=best_params['batch_size'], shuffle=True)

final_model.train()
for epoch in range(best_params['epochs']):
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = final_model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()

# Final Test AUC
final_model.eval()
with torch.no_grad():
    test_preds = final_model(X_test_tensor.to(device)).cpu().numpy()
    test_auc = roc_auc_score(y_test_tensor.numpy(), test_preds)
    print(f"\n✅ Final Test AUC: {test_auc:.4f}")


Hyperparameter Tuning Results:
1. AUC=0.8953 | Config={'lr': 0.0005, 'hidden_dim': 256, 'dropout': 0.4, 'batch_size': 64, 'epochs': 12, 'val_auc': np.float64(0.8953157623301985)}
2. AUC=0.8912 | Config={'lr': 0.001, 'hidden_dim': 64, 'dropout': 0.3, 'batch_size': 64, 'epochs': 10, 'val_auc': np.float64(0.8911505423643561)}
3. AUC=0.8875 | Config={'lr': 0.0001, 'hidden_dim': 128, 'dropout': 0.5, 'batch_size': 32, 'epochs': 15, 'val_auc': np.float64(0.8874579077531809)}

✅ Final Test AUC: 0.8435
