# Neural Network - 3 tier approach

embeddings -> category classification -> brand classification

### Imports and configuration

In [60]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU")
else:
    device = torch.device("cpu")
    print("Using CPU")

# device = torch.device("cpu")  # Debugging purposes - easier to debug with CPU

Using GPU


In [61]:
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle

from sklearn.metrics import accuracy_score, precision_score, recall_score
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

### Load data from pkl file

In [62]:
class SimpleDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [63]:
folder_to_save_products_embeddings = "Tier_approach_logs_performacne"
train_embeddings = os.path.join(folder_to_save_products_embeddings, 'train_set_embeddings.pkl')
test_embeddings = os.path.join(folder_to_save_products_embeddings, 'test_set_embeddings.pkl')

In [64]:
BATCH_SIZE = 16

# CHANGE COLUMN NAMES AS NEEDED TO LOAD DATA:

# Load embeddings and labels from the pickle file
with open(train_embeddings, 'rb') as f:
    data = pickle.load(f)
    loaded_embeddings = data['embeddings']
    loaded_master_labels = data['master_labels']
    loaded_sub_labels = data['sub_labels']
    loaded_brand_labels = data['brand_labels']

train_data = loaded_embeddings

if isinstance(train_data, torch.Tensor):
    train_data = train_data.detach().cpu().numpy()  # Convert to NumPy array

if isinstance(loaded_master_labels, np.ndarray):
    loaded_master_labels = torch.from_numpy(loaded_master_labels)  # Convert to tensor

if isinstance(loaded_brand_labels, np.ndarray):
    loaded_brand_labels = torch.from_numpy(loaded_brand_labels)  # Convert to tensor

mapped_labels = False
if int(max(set(loaded_brand_labels)) + 1) != len(loaded_brand_labels.unique()):
    # Check if the mapping is off
    print(f'max label: {max(set(loaded_brand_labels))}, num of unique labels: {len(loaded_brand_labels.unique())} - remapping labels...')
    unique_labels = torch.unique(loaded_brand_labels)
    label_mapping = {old_label.item(): new_label for new_label, old_label in enumerate(unique_labels)}
    loaded_brand_labels = loaded_brand_labels.clone().apply_(lambda x: label_mapping[x])
    mapped_labels = True

train_num_master_cat = len(loaded_master_labels.unique())
train_num_brands = len(loaded_brand_labels.unique())
train_y_master_cat = loaded_master_labels.cpu()
train_y_brand = loaded_brand_labels.cpu()

# Print to verify the loaded data
print("Embeddings shape:", loaded_embeddings.shape)  # Should show the shape of the embeddings
print("number of Unique master categories:", train_num_master_cat)
print("number of Unique brands:", train_num_brands)
print('max category: ', max(loaded_master_labels))
print('min category: ', min(loaded_master_labels))
print('max brand class: ', max(loaded_brand_labels))
print('min brand class: ', min(loaded_brand_labels))
print("train_y_master_cat.shape:", train_y_master_cat.shape)
print("train_y_brand.shape:", train_y_brand.shape)

master_cat_dataset_train = SimpleDataset(train_data, train_y_master_cat)
master_cat_dataloader_train = DataLoader(master_cat_dataset_train, batch_size=BATCH_SIZE, shuffle=True)


Embeddings shape: torch.Size([87672, 1768])
number of Unique master categories: 10
number of Unique brands: 3892
max category:  tensor(9)
min category:  tensor(0)
max brand class:  tensor(3891)
min brand class:  tensor(0)
train_y_master_cat.shape: torch.Size([87672])
train_y_brand.shape: torch.Size([87672])


  self.labels = torch.tensor(labels, dtype=torch.float32)


In [65]:
# Load embeddings and labels from the pickle file

with open(test_embeddings, 'rb') as f:
    data = pickle.load(f)
    loaded_embeddings = data['embeddings']
    loaded_master_labels = data['master_labels']
    loaded_sub_labels = data['sub_labels']
    loaded_brand_labels = data['brand_labels']

test_data = loaded_embeddings

if isinstance(test_data, torch.Tensor):
    test_data = test_data.detach().cpu().numpy()  # Convert to NumPy array

if isinstance(loaded_brand_labels, np.ndarray):
    loaded_brand_labels = torch.from_numpy(loaded_brand_labels)  # Convert to tensor

if mapped_labels:
    # Apply the same label mapping to the test set if needed
    loaded_brand_labels = loaded_brand_labels.clone().apply_(lambda x: label_mapping.get(x, -1))
    
test_y_master_cat = loaded_master_labels.cpu()
test_y_brand = loaded_brand_labels.cpu()

# Print to verify the loaded data
print("Embeddings shape:", loaded_embeddings.shape)  # Should show the shape of the embeddings
print("number of Unique master categories:", len(loaded_master_labels.unique()))
print("number of Unique brands:", len(loaded_brand_labels.unique()))
print('max brand: ', max(loaded_brand_labels))
print('min brand: ', min(loaded_brand_labels))
print("test_y_master_cat.shape:", test_y_master_cat.shape)
print("test_y_brand.shape:", test_y_brand.shape)

master_cat_dataset_test = SimpleDataset(test_data, test_y_master_cat)
master_cat_dataloader_test = DataLoader(master_cat_dataset_test, batch_size=BATCH_SIZE, shuffle=False)


Embeddings shape: torch.Size([21919, 1768])
number of Unique master categories: 10
number of Unique brands: 3136
max brand:  tensor(3891)
min brand:  tensor(1)
test_y_master_cat.shape: torch.Size([21919])
test_y_brand.shape: torch.Size([21919])


  self.labels = torch.tensor(labels, dtype=torch.float32)


In [66]:
print("Train data (embeddings) shape:", train_data.shape)
print("Test data (embeddigs) shape:", test_data.shape)
print('Train label (category) shape: ', train_y_master_cat.shape)
print('Test label (category) shape: ', test_y_master_cat.shape)
print('Train label (brand) shape: ', train_y_brand.shape)
print('Test label (brand) shape: ', test_y_brand.shape)

Train data (embeddings) shape: (87672, 1768)
Test data (embeddigs) shape: (21919, 1768)
Train label (category) shape:  torch.Size([87672])
Test label (category) shape:  torch.Size([21919])
Train label (brand) shape:  torch.Size([87672])
Test label (brand) shape:  torch.Size([21919])


# Neural Network

In [67]:
weights_file_name = "model.pth"

class classifierNN(nn.Module):
    def __init__(self, input_size, output_size, layer1=64, layer2=32, dropout_rate=0.2, lr=0.001):
        super(classifierNN, self).__init__()
        self.fc1 = nn.Linear(input_size, layer1)  # First hidden layer
        self.dropout1 = nn.Dropout(dropout_rate)   # Dropout layer after first hidden layer
        self.fc2 = nn.Linear(layer1, layer2)        # Second hidden layer
        self.dropout2 = nn.Dropout(dropout_rate)   # Dropout layer after second hidden layer
        self.fc3 = nn.Linear(layer2, output_size)   # Output layer
        self.relu = nn.ReLU()                       # Activation function
        self.optimizer = optim.Adam(self.parameters(), lr=lr)  
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout1(x)  # Apply dropout after the first layer
        x = self.relu(self.fc2(x))
        x = self.dropout2(x)  # Apply dropout after the second layer
        x = self.fc3(x)
        return x

    def train_model(self, train_dataloader, val_dataloader, optimizer=None, num_epochs=100, k=10, save_directory=None, log_file=None):
        if save_directory:
            os.makedirs(save_directory, exist_ok=True)  # Create directory if it doesn't exist

        if optimizer is None:
            optimizer = self.optimizer
        self.train()  # Set the model to training mode
        for epoch in range(num_epochs):
            for features, labels in train_dataloader:
                optimizer.zero_grad()  # Clear gradients
                features = features.to(device)
                labels = labels.to(device)

                # Forward pass
                logits = self(features)
                labels = labels.long()
                loss = self.criterion(logits, labels)  # Reshape if needed

                # Backward pass
                loss.backward()
                optimizer.step()  # Update weights            

            if epoch % k == 0 or epoch == num_epochs - 1:
                # Evaluate on validation set
                _, val_loss, val_accuracy, strict_val_accuracy = self.evaluate(val_dataloader)
                self.train()
                
                log_message = (f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, "
                           f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}, "
                           f"99% Accuracy: {strict_val_accuracy:.4f}")
                print(log_message)

                # If a log file is specified, append to it
                if save_directory:
                    if log_file:
                        log_path = os.path.join(save_directory, log_file)

                        if log_path.endswith(".txt"):  # Ensure it's a text file
                            with open(log_path, "a") as f:  # Open in append mode
                                f.write(log_message + "\n")
                        else:
                            print("Warning: Provided file is not a .txt file, skipping logging.")
                    file_name = os.path.join(save_directory, weights_file_name)
                    # Save the model
                    torch.save(self, file_name)

        return val_loss
    
    def evaluate(self, dataloader, device=device):
        self.eval()  # Set the model to evaluation mode
        total_loss = 0
        correct_predictions = 0
        strict_correct_predictions = 0
        total_samples = 0
        all_logits = []

        with torch.no_grad():  # No need to track gradients
            for features, labels in dataloader:
                features = features.to(device)
                labels = labels.to(device)

                # Forward pass
                logits = self(features)  # Get the raw logits
                all_logits.append(logits.cpu())
                labels = labels.long()

                # Compute loss
                loss = self.criterion(logits, labels)
                total_loss += loss.item()

                # Calculate accuracy
                probabilities = torch.softmax(logits, dim=1)
                predictions = torch.argmax(probabilities, axis=1)
                correct_predictions += (predictions == labels).sum().item()
                
                strict_predictions = (probabilities.max(dim=1).values >= 0.99) & (predictions == labels)
                strict_correct_predictions += strict_predictions.sum().item()

                total_samples += labels.size(0)

        all_logits = torch.cat(all_logits, dim=0)
        avg_loss = total_loss / len(dataloader)
        accuracy = correct_predictions / total_samples
        strict_accuracy = strict_correct_predictions / total_samples
        return all_logits, avg_loss, accuracy, strict_accuracy

### Hyperparameter Tuning

In [68]:
def random_search(train_loader, val_loader, output_nodes, n_trials=10, epochs=10, k=5):
    best_model = None
    best_loss = float('inf')
    best_params = {}

    for i in range(n_trials):
        
        # CHANGE SEARCH PARAMETERS AS NEEDED:
    
        layer1 = np.random.randint(64, 1024)
        layer2 = np.random.randint(32, 1024)
        dropout_rate = np.random.uniform(0.05, 0.4)  
        learning_rate = 10**np.random.uniform(-6, -2)

        # layer1 = np.random.randint(8, 128)  # Number of neurons in first layer
        # layer2 = np.random.randint(8, 64)   # Number of neurons in second layer
        # dropout_rate = np.random.uniform(0.1, 0.5)  # Dropout rate
        # learning_rate = 10**np.random.uniform(-5, 0)  # Learning rate

        print(f'--- Params (iter {i+1}) ---')
        print(f'layer1: {layer1}, layer2: {layer2}, dropout: {dropout_rate}, learning_rate: {learning_rate}')
    
        # Initialize model, loss function, and optimizer
        embedding_size = train_data.shape[1]
        model = classifierNN(embedding_size, output_nodes, layer1=layer1, layer2=layer2, dropout_rate=dropout_rate)
        model.to(device)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # Train model
        val_loss = model.train_model(train_dataloader=train_loader, val_dataloader=val_loader, optimizer=optimizer, num_epochs=epochs, k=k)

        # Update best model if current one is better
        if val_loss < best_loss:
            best_loss = val_loss
            best_model = model
            best_params = {'layer1': layer1, 'layer2': layer2, 'dropout_rate': dropout_rate, 'learning_rate': learning_rate}

    return best_model, best_params, best_loss


### Evaluation

In [69]:
def get_threshold_metrics(logits, y_true, threshold=None):
    probs = torch.softmax(torch.tensor(logits), dim=1).numpy()
    y_pred = np.argmax(logits, axis=1)

    unique_images_test = len(y_true)  # Unique images in the test set
    unique_brands_test = len(np.unique(y_true))  # Unique brands in the test set

    if threshold:
        predicted_probabilities = probs[np.arange(len(probs)), y_pred]
        y_pred_threshold = np.where(predicted_probabilities >= threshold, y_pred, -1)

        accuracy = accuracy_score(y_true, y_pred_threshold)
        precision = precision_score(y_true, y_pred_threshold, average='macro', zero_division=0)
        recall = recall_score(y_true, y_pred_threshold, average='macro', zero_division=0)
        f1 = (2 * precision * recall) / (precision + recall) if (precision+recall>0) else 0.0
        
        valid_indices = predicted_probabilities >= threshold  # Get indices where predictions meet the threshold
        y_pred_adj =  y_pred_threshold[valid_indices]
        adj_accuracy = accuracy_score(y_true[valid_indices], y_pred_adj) if np.any(valid_indices) else 0.0
        unique_images_pred = len(y_pred_adj)
        unique_brands_pred = len(np.unique(y_pred_adj))

    else:
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, average='macro')
        recall = recall_score(y_true, y_pred, average='macro')
        f1 = (2 * precision * recall) / (precision + recall)
        adj_accuracy = accuracy
        unique_images_pred = len(y_pred)
        unique_brands_pred = len(np.unique(y_pred))
    
    coverage_images = unique_images_pred / unique_images_test if unique_images_test > 0 else 0.0
    coverage_brands = unique_brands_pred / unique_brands_test if unique_brands_test > 0 else 0.0

    return accuracy, precision, recall, f1, adj_accuracy, coverage_images, coverage_brands


def evaluate_and_save_model_stats(logits, y_true, metrics_title, file_path=None, print_metrics=False):
    # Dictionary to store metrics for each threshold
    metrics_dict = {
        "Threshold": [],
        "Accuracy": [],
        "Precision": [],
        "Recall": [],
        "F1 Score": [],
        "Adjusted Accuracy": [],
        "Image Coverage": [],
        "Brand Coverage": []
    }

    # Evaluate metrics at default, 99%, and 95% thresholds
    for threshold, label in [(None, "Default"), (0.95, "95%"), (0.96, "96%"), (0.97, "97%"), (0.98, "98%"), (0.99, "99%")]:
        accuracy, precision, recall, f1, adj_accuracy, cov_img, cov_brand = get_threshold_metrics(logits, y_true, threshold)
        
        accuracy = round(accuracy, 3)
        precision = round(precision, 3)
        recall = round(recall, 3)
        f1 = round(f1, 3)
        adj_accuracy = round(adj_accuracy, 3)
        cov_img = round(cov_img, 3)
        cov_brand = round(cov_brand, 3)

        if print_metrics:
            # Print metrics to console
            print(f'\n--- {metrics_title} {label} Metrics ---')
            print(f'Accuracy: {accuracy}')
            print(f'Precision: {precision}')
            print(f'Recall: {recall}')
            print(f'F1 Score: {f1}')
            print(f'Adjusted Accuracy: {adj_accuracy}')
            print(f'Image Coverage: {cov_img}')
            print(f'Brand Coverage: {cov_brand}')
            
            # Store metrics in dictionary
            metrics_dict["Threshold"].append(label)
            metrics_dict["Accuracy"].append(accuracy)
            metrics_dict["Precision"].append(precision)
            metrics_dict["Recall"].append(recall)
            metrics_dict["F1 Score"].append(f1)
            metrics_dict["Adjusted Accuracy"].append(adj_accuracy)
            metrics_dict["Image Coverage"].append(cov_img)
            metrics_dict["Brand Coverage"].append(cov_brand)

    if file_path:
        metrics_df = pd.DataFrame(metrics_dict)

        definitions = {
        "Metric": [
            "Softmax Threshold",
            "Accuracy",
            "Adjusted Accuracy",
            "Coverage_images",
            "Coverage_brands"
        ],
        "Definition": [
            "No threshold: all images are used. 99% threshold: predictions meeting at least 99% softmax probability.",
            "Number of predictions meeting the threshold and are correct / test set size.",
            "Number of predictions meeting the threshold and are correct / Number of predictions meeting the threshold.",
            "Unique images meeting the threshold / unique images in test set.",
            "Unique brands meeting the threshold / unique brands in test set."
        ],
        "Example": [
            "100 images with 85 unique brands. Model classified 90 images correctly with no threshold. 80 images (55 unique brands) meet the 99% threshold, out of these 80 images 75 were classified correctly.",
            "No threshold accuracy: 0.90, 99% threshold accuracy: 0.75",
            "99% adjusted accuracy: 75/80 = 0.93",
            "99% Coverage_Images: 0.80",
            "99% Coverage_brands: 55/85 = 0.647"
        ]
        }
        definitions_df = pd.DataFrame(definitions)


        with pd.ExcelWriter(file_path) as writer:
            metrics_df.to_excel(writer, sheet_name="Metrics", index=False)
            definitions_df.to_excel(writer, sheet_name="Definitions", index=False)

        print(f"Metrics and definitions saved to {file_path}")

### Neural Network Training

#### Category classifier

In [70]:
# Hyperparam tuning
best_model, best_params, best_loss = random_search(master_cat_dataloader_train, master_cat_dataloader_test, train_num_master_cat, n_trials=6, epochs=6, k=1)

print(f'Best Loss: {best_loss}')
print(f'Best Hyperparameters: {best_params}')


# Initialize model
logfilepath = "logs.txt"
current_time = datetime.now().strftime("%Y-%m-%d_%I-%M_%p")
dataset = 'BigBasket'
dataset_title = dataset + "_" if dataset else ''
save_directory = f"{dataset_title}Category_train_logs_{current_time}"

embedding_size = train_data.shape[1]
model_paths = []

# Can change params or use random search if performance isn't good:
model_cat = classifierNN(
    embedding_size, 
    train_num_master_cat, 
    layer1=best_params['layer1'], 
    layer2=best_params['layer2'], 
    dropout_rate=best_params['dropout_rate'], 
    lr=best_params['learning_rate']
    )

model_cat.to(device)
num_weights = sum(p.numel() for p in model_cat.parameters() if p.requires_grad)
print(f"Number of learnable weights: {num_weights}")


# Train the model

# For enabling training logs:
model_cat.train_model(master_cat_dataloader_train, master_cat_dataloader_test, num_epochs=50, k=1, save_directory=save_directory, log_file=logfilepath)
model_path = os.path.join(save_directory, weights_file_name)
model_paths.append(model_path)

# For disabling traning logs:
# model_cat.train_model(master_cat_dataloader_train, master_cat_dataloader_test, num_epochs=50, k=1)

logits_cat, loss_cat, accuracy_cat, strict_accuracy_cat = model_cat.evaluate(master_cat_dataloader_test)
logits_cat = logits_cat.cpu().numpy()


# Evaluate model
file_name = f"{dataset_title}category"
file_name = file_name + '_' if file_name else ''
cat_file_path = f'{save_directory}\\{file_name}metrics_output.xlsx'
evaluate_and_save_model_stats(logits_cat, test_y_master_cat, metrics_title=file_name, file_path=cat_file_path, print_metrics=True)

--- Params (iter 1) ---
layer1: 382, layer2: 980, dropout: 0.23288839590977467, learning_rate: 0.0001142669379078059
Epoch [1/6], Loss: 0.0002, Val Loss: 0.1104, Val Accuracy: 0.9678, 99% Accuracy: 0.8266
Epoch [2/6], Loss: 0.4685, Val Loss: 0.0977, Val Accuracy: 0.9713, 99% Accuracy: 0.8517
Epoch [3/6], Loss: 0.9461, Val Loss: 0.0890, Val Accuracy: 0.9749, 99% Accuracy: 0.8453
Epoch [4/6], Loss: 0.0060, Val Loss: 0.0800, Val Accuracy: 0.9773, 99% Accuracy: 0.8724
Epoch [5/6], Loss: 0.0000, Val Loss: 0.0775, Val Accuracy: 0.9792, 99% Accuracy: 0.8931
Epoch [6/6], Loss: 0.0892, Val Loss: 0.0762, Val Accuracy: 0.9788, 99% Accuracy: 0.8739
--- Params (iter 2) ---
layer1: 987, layer2: 661, dropout: 0.32306500230368534, learning_rate: 7.644243621410023e-05
Epoch [1/6], Loss: 0.0050, Val Loss: 0.1064, Val Accuracy: 0.9704, 99% Accuracy: 0.8226
Epoch [2/6], Loss: 0.0032, Val Loss: 0.0895, Val Accuracy: 0.9734, 99% Accuracy: 0.8326
Epoch [3/6], Loss: 0.0009, Val Loss: 0.0888, Val Accuracy: 0.9

#### Brand classifiers

In [71]:
for category_id in range(train_num_master_cat):

    train_mask = (train_y_master_cat == category_id)
    test_mask = (test_y_master_cat == category_id)

    # Select only the relevant data based on the masks
    category_train_data = train_data[train_mask]
    category_train_y_brand = train_y_brand[train_mask]
    category_test_data = test_data[test_mask]
    category_test_y_brand = test_y_brand[test_mask]

    brand_dataset_train = SimpleDataset(category_train_data, category_train_y_brand)
    brand_dataloader_train = DataLoader(brand_dataset_train, batch_size=BATCH_SIZE, shuffle=True)
    brand_dataset_test = SimpleDataset(category_test_data, category_test_y_brand)
    brand_dataloader_test = DataLoader(brand_dataset_test, batch_size=BATCH_SIZE, shuffle=False)

    # Perform hyperparameter tuning specifically for this category
    best_model, best_params, best_loss = random_search(brand_dataloader_train, brand_dataloader_test, train_num_brands, n_trials=6, epochs=6, k=1)
    print(f"Category: {category_id} | Best Loss: {best_loss}")
    print(f"Category: {category_id} | Best Hyperparameters: {best_params}")
    
    # Define a title and directory for saving logs for this category
    train_log_title = f"{dataset_title}Category_{category_id}"
    save_directory = f"{train_log_title}_train_logs_{current_time}"

    # Initialize the model for this category with the best hyperparameters
    embedding_size = train_data.shape[1]  # assuming all categories share the same input feature size
    model_brand = classifierNN(
        embedding_size, 
        train_num_brands, 
        layer1=best_params['layer1'], 
        layer2=best_params['layer2'], 
        dropout_rate=best_params['dropout_rate'], 
        lr=best_params['learning_rate']
    )
    
    model_brand.to(device)
    num_weights = sum(p.numel() for p in model_brand.parameters() if p.requires_grad)
    print(f"Category: {category_id} | Number of learnable weights: {num_weights}")

    # Train the model for this category and enable logging
    model_brand.train_model(
        brand_dataloader_train, 
        brand_dataloader_test, 
        num_epochs=50,
        k=1, 
        save_directory=save_directory, 
        log_file=logfilepath
    )
    model_path = os.path.join(save_directory, weights_file_name)
    model_paths.append(model_path)

    # Evaluate the model for this category
    logits_brand, loss_brand, accuracy_brand, strict_accuracy_brand = model_brand.evaluate(brand_dataloader_test)
    logits_brand = logits_brand.cpu().numpy()

    file_name = f"{train_log_title}_brand"
    file_name = file_name + '_' if file_name else ''
    file_path = f'{save_directory}\\{file_name}metrics_output.xlsx'
    evaluate_and_save_model_stats(logits_brand, category_test_y_brand, metrics_title=file_name, file_path=file_path, print_metrics=False)

--- Params (iter 1) ---
layer1: 127, layer2: 865, dropout: 0.07366600739227883, learning_rate: 6.514348921701892e-06


  self.labels = torch.tensor(labels, dtype=torch.float32)


Epoch [1/6], Loss: 8.2408, Val Loss: 8.1287, Val Accuracy: 0.0000, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 8.2303, Val Loss: 8.1132, Val Accuracy: 0.0000, 99% Accuracy: 0.0000
Epoch [3/6], Loss: 8.1408, Val Loss: 8.0974, Val Accuracy: 0.0000, 99% Accuracy: 0.0000
Epoch [4/6], Loss: 8.0961, Val Loss: 8.0816, Val Accuracy: 0.0000, 99% Accuracy: 0.0000
Epoch [5/6], Loss: 8.0640, Val Loss: 8.0658, Val Accuracy: 0.0000, 99% Accuracy: 0.0000
Epoch [6/6], Loss: 8.0040, Val Loss: 8.0499, Val Accuracy: 0.0000, 99% Accuracy: 0.0000
--- Params (iter 2) ---
layer1: 523, layer2: 366, dropout: 0.19982606102245887, learning_rate: 0.0007230589052896631
Epoch [1/6], Loss: 7.1351, Val Loss: 5.0889, Val Accuracy: 0.6000, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 2.3054, Val Loss: 2.5878, Val Accuracy: 0.6000, 99% Accuracy: 0.2000
Epoch [3/6], Loss: 3.6109, Val Loss: 2.1144, Val Accuracy: 0.6000, 99% Accuracy: 0.6000
Epoch [4/6], Loss: 1.4411, Val Loss: 0.4566, Val Accuracy: 0.8000, 99% Accuracy: 0.4000
Epo

  self.labels = torch.tensor(labels, dtype=torch.float32)


Epoch [1/6], Loss: 1.9271, Val Loss: 2.1707, Val Accuracy: 0.5294, 99% Accuracy: 0.0980
Epoch [2/6], Loss: 0.4547, Val Loss: 0.9309, Val Accuracy: 0.7647, 99% Accuracy: 0.1373
Epoch [3/6], Loss: 0.2248, Val Loss: 0.5847, Val Accuracy: 0.8235, 99% Accuracy: 0.1961
Epoch [4/6], Loss: 0.3165, Val Loss: 0.3174, Val Accuracy: 0.9020, 99% Accuracy: 0.2157
Epoch [5/6], Loss: 0.2661, Val Loss: 0.2544, Val Accuracy: 0.9412, 99% Accuracy: 0.4118
Epoch [6/6], Loss: 0.4019, Val Loss: 0.1832, Val Accuracy: 0.9608, 99% Accuracy: 0.3725
--- Params (iter 2) ---
layer1: 227, layer2: 661, dropout: 0.22292099696495343, learning_rate: 0.003821527302871806
Epoch [1/6], Loss: 2.1631, Val Loss: 2.2601, Val Accuracy: 0.4706, 99% Accuracy: 0.0980
Epoch [2/6], Loss: 2.8092, Val Loss: 1.3939, Val Accuracy: 0.6863, 99% Accuracy: 0.2157
Epoch [3/6], Loss: 2.5752, Val Loss: 0.8687, Val Accuracy: 0.7843, 99% Accuracy: 0.1176
Epoch [4/6], Loss: 1.1583, Val Loss: 0.5225, Val Accuracy: 0.8039, 99% Accuracy: 0.3137
Epoc

  self.labels = torch.tensor(labels, dtype=torch.float32)


--- Params (iter 1) ---
layer1: 486, layer2: 742, dropout: 0.2664383111032962, learning_rate: 1.1550927896490326e-06
Epoch [1/6], Loss: 7.9400, Val Loss: 7.8260, Val Accuracy: 0.0299, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 6.5088, Val Loss: 6.6482, Val Accuracy: 0.0268, 99% Accuracy: 0.0000
Epoch [3/6], Loss: 5.9299, Val Loss: 6.1447, Val Accuracy: 0.0685, 99% Accuracy: 0.0000
Epoch [4/6], Loss: 5.4530, Val Loss: 5.8909, Val Accuracy: 0.0826, 99% Accuracy: 0.0000
Epoch [5/6], Loss: 5.5223, Val Loss: 5.6957, Val Accuracy: 0.0975, 99% Accuracy: 0.0000
Epoch [6/6], Loss: 6.4774, Val Loss: 5.5230, Val Accuracy: 0.1151, 99% Accuracy: 0.0000
--- Params (iter 2) ---
layer1: 845, layer2: 50, dropout: 0.3332904656345296, learning_rate: 0.0011188591059212813
Epoch [1/6], Loss: 5.8453, Val Loss: 3.9528, Val Accuracy: 0.2492, 99% Accuracy: 0.0274
Epoch [2/6], Loss: 3.2111, Val Loss: 3.5517, Val Accuracy: 0.2888, 99% Accuracy: 0.0503
Epoch [3/6], Loss: 4.1471, Val Loss: 3.2518, Val Accuracy: 0.340

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Metrics and definitions saved to BigBasket_Category_2_train_logs_2024-11-17_04-13_PM\BigBasket_Category_2_brand_metrics_output.xlsx


  self.labels = torch.tensor(labels, dtype=torch.float32)


--- Params (iter 1) ---
layer1: 157, layer2: 234, dropout: 0.27722250566132517, learning_rate: 0.005484473914146054
Epoch [1/6], Loss: 5.7328, Val Loss: 4.7768, Val Accuracy: 0.1196, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 4.1103, Val Loss: 4.6060, Val Accuracy: 0.1383, 99% Accuracy: 0.0000
Epoch [3/6], Loss: 5.8277, Val Loss: 4.3502, Val Accuracy: 0.1294, 99% Accuracy: 0.0000
Epoch [4/6], Loss: 5.0902, Val Loss: 4.1609, Val Accuracy: 0.1424, 99% Accuracy: 0.0000
Epoch [5/6], Loss: 4.5249, Val Loss: 4.1481, Val Accuracy: 0.1522, 99% Accuracy: 0.0000
Epoch [6/6], Loss: 4.6405, Val Loss: 4.1063, Val Accuracy: 0.1416, 99% Accuracy: 0.0081
--- Params (iter 2) ---
layer1: 198, layer2: 536, dropout: 0.3974272602014326, learning_rate: 0.00021762347429061399
Epoch [1/6], Loss: 4.1849, Val Loss: 4.2540, Val Accuracy: 0.2514, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 1.8467, Val Loss: 3.1019, Val Accuracy: 0.4125, 99% Accuracy: 0.0016
Epoch [3/6], Loss: 2.9213, Val Loss: 2.3821, Val Accuracy: 0.54

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Metrics and definitions saved to BigBasket_Category_3_train_logs_2024-11-17_04-13_PM\BigBasket_Category_3_brand_metrics_output.xlsx


  self.labels = torch.tensor(labels, dtype=torch.float32)


--- Params (iter 1) ---
layer1: 654, layer2: 615, dropout: 0.3525835626998758, learning_rate: 0.005673275297006618
Epoch [1/6], Loss: 5.7677, Val Loss: 5.2523, Val Accuracy: 0.0678, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 5.1249, Val Loss: 5.0825, Val Accuracy: 0.0636, 99% Accuracy: 0.0000
Epoch [3/6], Loss: 5.1104, Val Loss: 5.0438, Val Accuracy: 0.0628, 99% Accuracy: 0.0005
Epoch [4/6], Loss: 5.9589, Val Loss: 5.1061, Val Accuracy: 0.0586, 99% Accuracy: 0.0026
Epoch [5/6], Loss: 5.9363, Val Loss: 5.0591, Val Accuracy: 0.0681, 99% Accuracy: 0.0076
Epoch [6/6], Loss: 5.5520, Val Loss: 5.2300, Val Accuracy: 0.0534, 99% Accuracy: 0.0060
--- Params (iter 2) ---
layer1: 458, layer2: 463, dropout: 0.2426296094680923, learning_rate: 1.812954285966533e-06
Epoch [1/6], Loss: 8.0766, Val Loss: 8.0105, Val Accuracy: 0.0408, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 7.1762, Val Loss: 7.2068, Val Accuracy: 0.0411, 99% Accuracy: 0.0000
Epoch [3/6], Loss: 6.7594, Val Loss: 6.0771, Val Accuracy: 0.0542

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Metrics and definitions saved to BigBasket_Category_4_train_logs_2024-11-17_04-13_PM\BigBasket_Category_4_brand_metrics_output.xlsx
--- Params (iter 1) ---
layer1: 891, layer2: 1016, dropout: 0.23329885934628397, learning_rate: 0.0034692553985404364


  self.labels = torch.tensor(labels, dtype=torch.float32)


Epoch [1/6], Loss: 1.1846, Val Loss: 0.9253, Val Accuracy: 0.6951, 99% Accuracy: 0.1829
Epoch [2/6], Loss: 0.2018, Val Loss: 1.0564, Val Accuracy: 0.7561, 99% Accuracy: 0.3902
Epoch [3/6], Loss: 0.0675, Val Loss: 0.2660, Val Accuracy: 0.9268, 99% Accuracy: 0.6585
Epoch [4/6], Loss: 0.0298, Val Loss: 0.2477, Val Accuracy: 0.8902, 99% Accuracy: 0.6585
Epoch [5/6], Loss: 0.2798, Val Loss: 0.2424, Val Accuracy: 0.9268, 99% Accuracy: 0.7439
Epoch [6/6], Loss: 0.2987, Val Loss: 0.5129, Val Accuracy: 0.8537, 99% Accuracy: 0.7317
--- Params (iter 2) ---
layer1: 251, layer2: 315, dropout: 0.36475880009005257, learning_rate: 0.0001682306039186052
Epoch [1/6], Loss: 5.7157, Val Loss: 5.0503, Val Accuracy: 0.3171, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 2.7617, Val Loss: 2.0922, Val Accuracy: 0.4512, 99% Accuracy: 0.0000
Epoch [3/6], Loss: 1.2195, Val Loss: 1.3106, Val Accuracy: 0.6585, 99% Accuracy: 0.0000
Epoch [4/6], Loss: 1.6573, Val Loss: 1.0075, Val Accuracy: 0.6585, 99% Accuracy: 0.0244
Epo

  self.labels = torch.tensor(labels, dtype=torch.float32)


Epoch [2/6], Loss: 5.7243, Val Loss: 5.2822, Val Accuracy: 0.3968, 99% Accuracy: 0.0000
Epoch [3/6], Loss: 4.8188, Val Loss: 3.1368, Val Accuracy: 0.4762, 99% Accuracy: 0.0000
Epoch [4/6], Loss: 3.2977, Val Loss: 2.6258, Val Accuracy: 0.5238, 99% Accuracy: 0.0000
Epoch [5/6], Loss: 1.9578, Val Loss: 2.3242, Val Accuracy: 0.6508, 99% Accuracy: 0.0000
Epoch [6/6], Loss: 1.4443, Val Loss: 2.0757, Val Accuracy: 0.6190, 99% Accuracy: 0.0000
--- Params (iter 2) ---
layer1: 516, layer2: 357, dropout: 0.37681470417585927, learning_rate: 0.00010723080351940511
Epoch [1/6], Loss: 7.6293, Val Loss: 7.6090, Val Accuracy: 0.3492, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 6.1498, Val Loss: 6.1823, Val Accuracy: 0.3175, 99% Accuracy: 0.0000
Epoch [3/6], Loss: 3.6905, Val Loss: 3.6524, Val Accuracy: 0.3175, 99% Accuracy: 0.0000
Epoch [4/6], Loss: 1.3866, Val Loss: 2.5618, Val Accuracy: 0.5714, 99% Accuracy: 0.0000
Epoch [5/6], Loss: 1.4415, Val Loss: 2.1084, Val Accuracy: 0.6984, 99% Accuracy: 0.0317
Ep

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Metrics and definitions saved to BigBasket_Category_6_train_logs_2024-11-17_04-13_PM\BigBasket_Category_6_brand_metrics_output.xlsx


  self.labels = torch.tensor(labels, dtype=torch.float32)


--- Params (iter 1) ---
layer1: 434, layer2: 959, dropout: 0.20306412095157544, learning_rate: 1.7943633593822913e-05
Epoch [1/6], Loss: 4.3049, Val Loss: 4.4163, Val Accuracy: 0.2183, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 2.2489, Val Loss: 3.5182, Val Accuracy: 0.3525, 99% Accuracy: 0.0008
Epoch [3/6], Loss: 3.5327, Val Loss: 2.9332, Val Accuracy: 0.4285, 99% Accuracy: 0.0097
Epoch [4/6], Loss: 2.6569, Val Loss: 2.4930, Val Accuracy: 0.5093, 99% Accuracy: 0.0234
Epoch [5/6], Loss: 1.2721, Val Loss: 2.1341, Val Accuracy: 0.5861, 99% Accuracy: 0.0307
Epoch [6/6], Loss: 1.8994, Val Loss: 1.8397, Val Accuracy: 0.6443, 99% Accuracy: 0.0453
--- Params (iter 2) ---
layer1: 203, layer2: 609, dropout: 0.12247097487591131, learning_rate: 6.149787821216244e-05
Epoch [1/6], Loss: 4.5042, Val Loss: 3.3468, Val Accuracy: 0.3751, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 2.9509, Val Loss: 2.1652, Val Accuracy: 0.5788, 99% Accuracy: 0.0251
Epoch [3/6], Loss: 2.5089, Val Loss: 1.5036, Val Accuracy: 0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  self.labels = torch.tensor(labels, dtype=torch.float32)


Metrics and definitions saved to BigBasket_Category_7_train_logs_2024-11-17_04-13_PM\BigBasket_Category_7_brand_metrics_output.xlsx
--- Params (iter 1) ---
layer1: 397, layer2: 771, dropout: 0.1960500392527187, learning_rate: 0.005200128013493214
Epoch [1/6], Loss: 2.6725, Val Loss: 8.3136, Val Accuracy: 0.1667, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 4.1375, Val Loss: 2.5202, Val Accuracy: 0.5000, 99% Accuracy: 0.0000
Epoch [3/6], Loss: 2.1642, Val Loss: 1.3975, Val Accuracy: 0.5000, 99% Accuracy: 0.0000
Epoch [4/6], Loss: 0.0002, Val Loss: 1.1075, Val Accuracy: 0.6667, 99% Accuracy: 0.0000
Epoch [5/6], Loss: 0.1354, Val Loss: 1.2250, Val Accuracy: 0.6667, 99% Accuracy: 0.0000
Epoch [6/6], Loss: 0.2868, Val Loss: 0.0923, Val Accuracy: 1.0000, 99% Accuracy: 0.5000
--- Params (iter 2) ---
layer1: 440, layer2: 472, dropout: 0.3246498186785127, learning_rate: 6.932214627268042e-06
Epoch [1/6], Loss: 8.4282, Val Loss: 8.1702, Val Accuracy: 0.0000, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 8.

  self.labels = torch.tensor(labels, dtype=torch.float32)


--- Params (iter 1) ---
layer1: 858, layer2: 1022, dropout: 0.07661810378450411, learning_rate: 5.217677937738591e-06
Epoch [1/6], Loss: 7.2149, Val Loss: 5.3925, Val Accuracy: 0.1421, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 4.8003, Val Loss: 4.3168, Val Accuracy: 0.2601, 99% Accuracy: 0.0000
Epoch [3/6], Loss: 1.8524, Val Loss: 3.5641, Val Accuracy: 0.3875, 99% Accuracy: 0.0000
Epoch [4/6], Loss: 4.9455, Val Loss: 3.0276, Val Accuracy: 0.4610, 99% Accuracy: 0.0003
Epoch [5/6], Loss: 2.1352, Val Loss: 2.6314, Val Accuracy: 0.5234, 99% Accuracy: 0.0013
Epoch [6/6], Loss: 2.0500, Val Loss: 2.3301, Val Accuracy: 0.5637, 99% Accuracy: 0.0038
--- Params (iter 2) ---
layer1: 923, layer2: 151, dropout: 0.2844269219611914, learning_rate: 1.5207937641454495e-06
Epoch [1/6], Loss: 8.2997, Val Loss: 8.1634, Val Accuracy: 0.0028, 99% Accuracy: 0.0000
Epoch [2/6], Loss: 7.7859, Val Loss: 7.9024, Val Accuracy: 0.0301, 99% Accuracy: 0.0000
Epoch [3/6], Loss: 7.4601, Val Loss: 7.4604, Val Accuracy: 0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Metrics and definitions saved to BigBasket_Category_9_train_logs_2024-11-17_04-13_PM\BigBasket_Category_9_brand_metrics_output.xlsx


In [72]:
models = [torch.load(model_path).eval() for model_path in model_paths]  # Load each model and set to eval mode

all_logits = []

# Evaluate each batch in the test dataloader
with torch.no_grad():  # Disable gradient calculation for evaluation
    for i in range(0, len(test_data)):
        
        # Predict the category first (using the category model, assumed to be the first model in list)
        emb = torch.tensor(test_data[i]).to(device)
        category_logits = models[0](emb)
        predicted_category = torch.argmax(category_logits)

        # Forward pass only through the specialized model for the predicted category
        logit = models[predicted_category+1](emb)
        all_logits.append(logit.cpu().numpy())

all_logits = np.array(all_logits)

save_directory = f"{dataset_title}final_eval_{current_time}"
file_name = f"{dataset_title}3_tier_prediction"
file_path = f'{save_directory}\\{file_name}_metrics_output.xlsx'

if not os.path.exists(save_directory):
    os.makedirs(save_directory)

evaluate_and_save_model_stats(all_logits, test_y_brand, metrics_title=file_name, file_path=file_path, print_metrics=True)


  models = [torch.load(model_path).eval() for model_path in model_paths]  # Load each model and set to eval mode
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- BigBasket_3_tier_prediction Default Metrics ---
Accuracy: 0.947
Precision: 0.954
Recall: 0.958
F1 Score: 0.956
Adjusted Accuracy: 0.947
Image Coverage: 1.0
Brand Coverage: 0.989

--- BigBasket_3_tier_prediction 95% Metrics ---
Accuracy: 0.915
Precision: 0.968
Recall: 0.942
F1 Score: 0.955
Adjusted Accuracy: 0.975
Image Coverage: 0.939
Brand Coverage: 0.984

--- BigBasket_3_tier_prediction 96% Metrics ---
Accuracy: 0.913
Precision: 0.968
Recall: 0.94
F1 Score: 0.954
Adjusted Accuracy: 0.976
Image Coverage: 0.935
Brand Coverage: 0.984

--- BigBasket_3_tier_prediction 97% Metrics ---
Accuracy: 0.908
Precision: 0.968
Recall: 0.937
F1 Score: 0.953
Adjusted Accuracy: 0.977
Image Coverage: 0.93
Brand Coverage: 0.983

--- BigBasket_3_tier_prediction 98% Metrics ---
Accuracy: 0.903
Precision: 0.968
Recall: 0.934
F1 Score: 0.951
Adjusted Accuracy: 0.978
Image Coverage: 0.922
Brand Coverage: 0.982

--- BigBasket_3_tier_prediction 99% Metrics ---
Accuracy: 0.893
Precision: 0.969
Recall: 0.928