# Neural Network

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, roc_auc_score
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

# Load dataset
df = pd.read_csv("hotel_data_cleaned.csv")


In [5]:
df.head(30)

Unnamed: 0,is_canceled,lead_time,arrival_date_year,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,children,babies,is_repeated_guest,...,country_binned_IRL,country_binned_ITA,country_binned_NLD,country_binned_Other,country_binned_PRT,agent_binned_9.0,agent_binned_14.0,agent_binned_240.0,agent_binned_Not Specified,agent_binned_Other
0,0.0,0.464043,0.0,0.0,0.0,0.0,0.036364,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,1,0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.036364,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,1,0
2,0.0,0.009498,0.0,0.0,0.0,0.02,0.018182,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,1,0
3,0.0,0.017639,0.0,0.0,0.0,0.02,0.018182,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,1
4,0.0,0.018996,0.0,0.0,0.0,0.04,0.036364,0.0,0.0,0.0,...,0,0,0,0,0,0,0,1,0,0
5,0.0,0.0,0.0,0.0,0.0,0.04,0.036364,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,1,0
6,0.0,0.012212,0.0,0.0,0.0,0.04,0.036364,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,1
7,1.0,0.115332,0.0,0.0,0.0,0.06,0.036364,0.0,0.0,0.0,...,0,0,0,0,1,0,0,1,0,0
8,1.0,0.101764,0.0,0.0,0.0,0.06,0.036364,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,1
9,1.0,0.031208,0.0,0.0,0.0,0.08,0.036364,0.0,0.0,0.0,...,0,0,0,0,1,0,0,1,0,0


In [None]:
# ----- Preprocessing -----
# Target
target_col = 'is_canceled'

# Encode categorical features
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split features/target
X = df.drop(columns=[target_col])
y = df[target_col]

# Train-test split (time-based holdout emulation if needed)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Dataset and DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

# ----- Define Neural Network -----
class CancellationNN(nn.Module):
    def __init__(self, input_dim):
        super(CancellationNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

model = CancellationNN(X_train.shape[1])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# ----- Training Loop -----
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device).unsqueeze(1)

        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss/len(train_loader):.4f}")

# ----- Evaluation -----
model.eval()
with torch.no_grad():
    y_pred_prob = model(X_test_tensor.to(device)).cpu().numpy().flatten()
    y_pred = (y_pred_prob > 0.5).astype(int)

# Metrics
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_prob))