## Train a model on the Wine Quality dataset using ordinal loss and the Kappa metric in PyTorch

Date: 2024-12-18

In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import DataLoader, TensorDataset

# Load the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
data = pd.read_csv(url, sep=';')

# Define features and target
X = data.drop('quality', axis=1)
y = data['quality'] - 3

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Create DataLoader for training and testing
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

# Set the device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## Define the Model
The model definition remains the same as in the previous solution.

In [35]:
import torch.nn as nn

class OrdinalNN(nn.Module):
    def __init__(self):
        super(OrdinalNN, self).__init__()
        self.fc1 = nn.Linear(X.shape[1], 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 6)  # Number of classes (3-8)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = OrdinalNN().to(device)


In [36]:
class OrdinalFocalLoss(nn.Module):
    def __init__(self, gamma=2, alpha=0.25, num_classes=6):
        super(OrdinalFocalLoss, self).__init__()
        self.gamma = gamma  # Focusing parameter
        self.alpha = alpha  # Weighting factor
        self.num_classes = num_classes

    def forward(self, outputs, targets):
        """
        Compute the Ordinal Focal Loss

        :param outputs: Predicted logits from the model (batch_size, num_classes)
        :param targets: Ground truth labels (batch_size)
        :return: Loss value
        """
        # Convert targets to one-hot encoding
        targets_one_hot = torch.zeros(targets.size(0), self.num_classes).to(targets.device)
        targets_one_hot.scatter_(1, targets.unsqueeze(1), 1)

        # Apply softmax to outputs to get class probabilities
        probs = torch.softmax(outputs, dim=1)

        # Calculate the probability of the true class
        p_t = torch.sum(probs * targets_one_hot, dim=1)  # This is p_t for each instance

        # Compute the focal loss
        loss = -self.alpha * (1 - p_t) ** self.gamma * torch.log(p_t + 1e-8)  # Add epsilon to avoid log(0)

        # Return the average loss
        return torch.mean(loss)

In [37]:
from sklearn.metrics import cohen_kappa_score
# Initialize the criterion and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Function to compute the Kappa score
def compute_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true.cpu(), y_pred.cpu())

# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    y_true = []
    y_pred = []
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        
        # Calculate loss
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # Collect true and predicted labels for Kappa score
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(outputs.argmax(dim=1).cpu().numpy())
    
    # Calculate Kappa score
    kappa_score = compute_kappa(torch.tensor(y_true), torch.tensor(y_pred))
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}, Kappa: {kappa_score}")


Epoch 1/100, Loss: 1.5258033365011214, Kappa: 0.027160395525093195
Epoch 2/100, Loss: 1.14848440438509, Kappa: 0.1941581189042062
Epoch 3/100, Loss: 1.0254976019263267, Kappa: 0.29289946055146165
Epoch 4/100, Loss: 0.988839827477932, Kappa: 0.3383639594251141
Epoch 5/100, Loss: 0.965928153693676, Kappa: 0.3631989771546358
Epoch 6/100, Loss: 0.9539458021521569, Kappa: 0.3677904087658541
Epoch 7/100, Loss: 0.9409917533397675, Kappa: 0.3671246515932114
Epoch 8/100, Loss: 0.9327839985489845, Kappa: 0.3751479492595736
Epoch 9/100, Loss: 0.9210372045636177, Kappa: 0.3685858232131014
Epoch 10/100, Loss: 0.922743383049965, Kappa: 0.3722848662886773
Epoch 11/100, Loss: 0.9097453355789185, Kappa: 0.3813580749062848
Epoch 12/100, Loss: 0.9032061487436295, Kappa: 0.37044592857844605
Epoch 13/100, Loss: 0.8965445622801781, Kappa: 0.3797972038405859
Epoch 14/100, Loss: 0.8881877571344375, Kappa: 0.4032125634822731
Epoch 15/100, Loss: 0.8835199296474456, Kappa: 0.40018818898663533
Epoch 16/100, Loss:

In [38]:
# Evaluate the model
model.eval()
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(inputs)
        
        # Collect true and predicted labels for Kappa score
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(outputs.argmax(dim=1).cpu().numpy())

# Calculate Kappa score on the test set
test_kappa = compute_kappa(torch.tensor(y_true), torch.tensor(y_pred))
print(f"Test Kappa Score: {test_kappa}")

Test Kappa Score: 0.38975208678525664


## So sánh Cross Entropy Loss và Focal Loss

In [50]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import cohen_kappa_score, accuracy_score
from torch.utils.data import DataLoader

# Assuming OrdinalFocalLoss and model are defined as before
# CrossEntropy Loss
cross_entropy_criterion = nn.CrossEntropyLoss().to(device)

# Focal Loss
focal_loss_criterion = OrdinalFocalLoss().to(device)

# Initialize your model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = OrdinalNN().to(device)

# Helper function for Kappa Score and Accuracy
def compute_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true.cpu(), y_pred.cpu())
    kappa = cohen_kappa_score(y_true.cpu(), y_pred.cpu())
    return accuracy, kappa

# Training function for comparison
def train_model(criterion, model, num_epochs=100, train_loader=None, test_loader=None):

    # Optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        y_true = []
        y_pred = []
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Zero gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            
            # Calculate loss
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            # Collect true and predicted labels for Kappa score
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(outputs.argmax(dim=1).cpu().numpy())
        
        # Calculate Kappa score
        kappa_score = compute_kappa(torch.tensor(y_true), torch.tensor(y_pred))
        # print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}, Kappa: {kappa_score}")

    # Evaluate on test set
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(outputs.argmax(dim=1).cpu().numpy())

    accuracy, kappa = compute_metrics(torch.tensor(y_true), torch.tensor(y_pred))
    print(f"Test Accuracy: {accuracy:.4f}, Test Kappa: {kappa:.4f}")

# Run training and evaluation for both loss functions
print("Training with CrossEntropyLoss:")
train_model(cross_entropy_criterion, model=model, num_epochs=100, train_loader=train_loader, test_loader=test_loader)

print("\nTraining with Focal Loss:")
train_model(focal_loss_criterion, model=model, num_epochs=100, train_loader=train_loader, test_loader=test_loader)


Training with CrossEntropyLoss:
Test Accuracy: 0.6125, Test Kappa: 0.3739

Training with Focal Loss:
Test Accuracy: 0.5969, Test Kappa: 0.3695


In [51]:
class OrdinalModelWithDropout(nn.Module):
    def __init__(self, input_size, num_classes, dropout_rate=0.5):
        super(OrdinalModelWithDropout, self).__init__()
        
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        
        # Adding dropout layers between fully connected layers
        self.dropout = nn.Dropout(p=dropout_rate)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)  # Apply dropout after the first layer
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)  # Apply dropout after the second layer
        x = self.fc3(x)
        return x

In [52]:
# Initialize your model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = OrdinalModelWithDropout(input_size=X.shape[1], num_classes=6).to(device)

# Run training and evaluation for both loss functions
print("Training with CrossEntropyLoss:")
train_model(cross_entropy_criterion, model=model, num_epochs=100, train_loader=train_loader, test_loader=test_loader)

print("\nTraining with Focal Loss:")
train_model(focal_loss_criterion, model=model, num_epochs=100, train_loader=train_loader, test_loader=test_loader)

Training with CrossEntropyLoss:
Test Accuracy: 0.6188, Test Kappa: 0.3709

Training with Focal Loss:
Test Accuracy: 0.6438, Test Kappa: 0.4167
