In [1]:
import torch
print(torch.cuda.is_available())

True


In [2]:
x = torch.rand(5000, 5000).cuda()
y = torch.mm(x, x)
print(y)

tensor([[1249.7080, 1275.0864, 1275.3453,  ..., 1267.7378, 1252.3220,
         1264.3848],
        [1230.2849, 1255.5793, 1255.8417,  ..., 1245.1591, 1260.0813,
         1214.6724],
        [1250.6439, 1256.7516, 1271.6261,  ..., 1259.1284, 1262.8622,
         1254.0247],
        ...,
        [1256.9447, 1272.2457, 1278.2755,  ..., 1277.9875, 1267.7079,
         1262.0885],
        [1259.6976, 1282.4211, 1286.1539,  ..., 1269.2705, 1274.0341,
         1264.1400],
        [1235.2375, 1250.6504, 1274.6162,  ..., 1253.2966, 1258.3629,
         1241.5743]], device='cuda:0')


In [4]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load and preprocess dataset
df_train = pd.read_csv('Airline_passenger_satiscation_train.csv')
df_test = pd.read_csv('Airline_passenger_satiscation_test.csv')
df = pd.concat([df_train, df_test], ignore_index=True)
df = df.drop(columns=["id"])

# Encode categorical features
categorical_cols = ["Gender", "Customer Type", "Type of Travel", "Class"]
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))

# Encode target
target_encoder = LabelEncoder()
df["satisfaction"] = target_encoder.fit_transform(df["satisfaction"])

# Clean and prepare data
df = df.fillna(0)
X = df.drop(columns=["satisfaction"]).values
y = df["satisfaction"].values

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Device config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Custom MLP class with your specified architecture
class MLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim)
        )

    def forward(self, x):
        return self.model(x)

# Cross-validation function
def run_cv(X, y, n_splits):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    input_dim = X.shape[1]
    output_dim = len(np.unique(y))
    results = []

    for fold, (train_idx, test_idx) in enumerate(skf.split(X, y), 1):
        print(f"\n[Fold {fold}/{n_splits}]")

        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
        y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
        y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

        model = MLP(input_dim, output_dim).to(device)
        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.002)

        for epoch in range(300):
            model.train()
            optimizer.zero_grad()
            outputs = model(X_train_tensor)
            loss = loss_fn(outputs, y_train_tensor)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            preds = model(X_test_tensor).argmax(dim=1).cpu().numpy()
            y_true = y_test_tensor.cpu().numpy()

            acc = accuracy_score(y_true, preds)
            prec = precision_score(y_true, preds, zero_division=0)
            rec = recall_score(y_true, preds, zero_division=0)
            f1 = f1_score(y_true, preds, zero_division=0)

            results.append((acc, prec, rec, f1))

    return results

# Run 5-fold and 10-fold CV
results_5 = run_cv(X, y, 5)
results_10 = run_cv(X, y, 10)

# Format results
def print_results(results, label):
    accs, precs, recs, f1s = zip(*results)
    print(f"\n=== {label} Results ===")
    print("MODEL\tAccuracy\tPrecision\tRecall\t\tF1-Score")
    print(f"{label}\t{np.mean(accs):.4f}\t\t{np.mean(precs):.4f}\t\t{np.mean(recs):.4f}\t\t{np.mean(f1s):.4f}")

print_results(results_5, "5-Fold")
print_results(results_10, "10-Fold")


[Fold 1/5]

[Fold 2/5]

[Fold 3/5]

[Fold 4/5]

[Fold 5/5]

[Fold 1/10]

[Fold 2/10]

[Fold 3/10]

[Fold 4/10]

[Fold 5/10]

[Fold 6/10]

[Fold 7/10]

[Fold 8/10]

[Fold 9/10]

[Fold 10/10]

=== 5-Fold Results ===
MODEL	Accuracy	Precision	Recall		F1-Score
5-Fold	0.9648		0.9750		0.9433		0.9589

=== 10-Fold Results ===
MODEL	Accuracy	Precision	Recall		F1-Score
10-Fold	0.9647		0.9733		0.9447		0.9588
