In [1]:
import numpy as np
import pickle
import matplotlib.pyplot as plt
import torch
from torch import nn, optim
import torch.nn.functional as F
import sklearn
import os
import imgaug.augmenters as iaa
from tqdm.notebook import tqdm
from training_utils import (
    Triangles,
    build_batches,
    get_net_optimiser_scheduler_criterion,
)
import training_utils
from torch.utils.data import Dataset, DataLoader
from importlib import reload

import sys
sys.path.append('../Augmentation/')
sys.path.append('../Simulator/')
import simulation
import augmentations

Define name of runs and create folder

In [3]:
device_type = "finfets"

NAME_OF_RUN = "2021XXXX_only_simulated_data_" + device_type + "/"

path = "data/"

if not os.path.exists(path):
    os.mkdir(path)

path = path + NAME_OF_RUN

if not os.path.exists(path):
    os.mkdir(path)

path_networks = "data/saved_networks/" + NAME_OF_RUN
if not os.path.exists(path_networks):
    os.makedirs(path_networks)

Depending on available ressources we can use either a GPU or CPU

In [4]:
device = torch.device("cuda:5" if torch.cuda.is_available() else "cpu")
print(device)

cpu


load real data and reshape to have uniform pixel dimensions

In [6]:
X = np.load("../Data/processed_data/" + device_type + "_imgs.npy", allow_pickle=True)
y = np.load("../Data/processed_data/" + device_type + "_labels.npy")
names = np.load("../Data/processed_data/" + device_type + "_names.npy")
device_names = np.load("../Data/processed_data/" + device_type + "_device_names.npy")

resizer = iaa.Resize([100, 100])

new_X = []
for element in X:
    el0 = resizer.augment_image(image=element[0])
    el1 = resizer.augment_image(image=element[1])
    im = augmentations.normalise([el0, el1])
    new_X.append(im)
X = np.array(new_X)

# Training only with simulated data

Define training hyperparameters

In [7]:
n_epochs = 100
n_episodes = 128  # None #only make a single batch
n_repetitions = 10

n_imgs_per_batch = 25000
n_augmentations = 2  # 10

print_every_n_epoch = 33

# test_devices=np.unique(device_names)
test_devices = [
    "Tuor6A_chiplet_5_device_C",
    "Tuor2E_chiplet_10_device_J",
    "Tuor6A_chiplet_6_device_E",
    "Tuor6A_chiplet_7_device_A",
]

In [8]:
results_only_sim = training_utils.get_results_dict()

In [9]:
# This loop simulates, augments and trains the neural network for a number of repetitions
for rep in range(n_repetitions):
 
    # Simulate a new batch of images and corresponding labels
    print("simulating a new batch with", n_imgs_per_batch, "samples")
    X_train_sim, y_train_sim = simulation.simulate(n_imgs_per_batch)
    X_train_sim = np.array(X_train_sim)
    y_train_sim = np.array(y_train_sim, dtype=int)

    # Augment the simulated batch for a number of times and store the augmented images and labels
    X_train_new = []
    y_train_new = []
    for n_aug in tqdm(range(n_augmentations)):
        _X_train = augmentations.augment_batch_mp(
            X_train_sim, shear_and_stretch=False, n_workers=20
        )
        X_train_new.append(_X_train)
        y_train_new.append(y_train_sim)

    # Reshape the augmented images and labels to match the required input shape for the model
    X_train_sim = np.array(X_train_new)
    y_train_sim = np.array(y_train_new)
    X_train_sim = X_train_sim.reshape(
        (-1, X_train_sim.shape[-3], X_train_sim.shape[-2], X_train_sim.shape[-1])
    )
    y_train_sim = y_train_sim.reshape(-1)

    # Prepare the training set for the model
    X_train = X_train_sim
    y_train = y_train_sim
    dataset = Triangles(imgs=X_train, labels=y_train)
    dataloader = DataLoader(dataset, batch_size=n_episodes, shuffle=True, num_workers=0)

    # Initialize the model, optimizer, learning rate scheduler and loss function
    net, optimizer, scheduler, criterion = get_net_optimiser_scheduler_criterion(device)

    # Train the model for a number of epochs
    loss_history = []
    lr_history = []
    for epoch in tqdm(range(n_epochs)):
        for i_batch, sample_batched in enumerate(dataloader):
            X_train_minibatch = sample_batched["image"].to(device).float()
            y_train_minibatch = sample_batched["label"].to(device).long()

            optimizer.zero_grad()
            outputs = net(X_train_minibatch)
            loss = criterion(outputs, y_train_minibatch)
            loss.backward()
            optimizer.step()

        # Record the loss and learning rate at each epoch
        loss_history.append(loss.item())
        lr_history.append(optimizer.param_groups[0]["lr"])
        scheduler.step(loss.item())

        # Print the loss and learning rate every few epochs
        if epoch % print_every_n_epoch == print_every_n_epoch - 1:
            print("[%d] loss: %.7f" % (epoch + 1, loss.item()))
            print("learning rate now:", optimizer.param_groups[0]["lr"])

    # Save the trained model
    torch.save(
        net.state_dict(), path_networks + "/only_simulator_rep_" + str(rep) + ".pth"
    )

    # Evaluate the model on the full dataset
    net.eval()
    with torch.no_grad():
        outputs_full = net(torch.FloatTensor(X).to(device))
        predicted_full = torch.max(outputs_full.data, 1).indices.detach().cpu()
        m = nn.Softmax()
        scores_full = m(outputs_full).detach().cpu().numpy()

    # Compute the scores for each class in the dataset
    scores_full = scores_full[:, 1]

    # Record the results of the model's predictions and report them
    results_only_sim = training_utils.record_results(
        results_only_sim,
        predicted_full,
        scores_full,
        y,
        device_names=device_names,
        triangle_names=names,
    )
    training_utils.report_results(results_only_sim)

    # Save the results
    pickle.dump(results_only_sim, open(path + "/results_only_sim.pkl", "wb"))

simulating a new batch with 25000 samples


  0%|          | 0/25000 [00:00<?, ?it/s]

  return 1 / (1 + np.exp((x - mu) * beta))
  denominator = tc**2 * (2 + gamma_d / gamma_s) + 0.25 * gamma_d**2 + epsilon**2


  0%|          | 0/2 [00:00<?, ?it/s]



  0%|          | 0/100 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Training with only experimental data

In [None]:
NAME_OF_RUN = "2021XXXX_crossdevice_only_real_data_" + device_type

path = "data/"

path = path + NAME_OF_RUN

if not os.path.exists(path):
    os.mkdir(path)

path_networks = "data/saved_networks/" + NAME_OF_RUN
if not os.path.exists(path_networks):
    os.mkdir(path_networks)

In [None]:
n_total_samples = 50000

chunksize = 10

print_every_n_epoch = 33

In [None]:
# Initialize an empty dictionary to store the results
results_only_real_data = training_utils.get_results_dict()

# The main loop for running the experiment multiple times
for rep in range(n_repetitions):
    print("This is rep", rep)

    # Initialize lists to store predictions, scores, labels, names and device names for each repetition
    predicted = []
    scores = []
    y_test_this_rep = []
    _names = []
    _device_names = []

    fold = 0
    # For each device in the test set, create a fold
    for test_device_name in test_devices:
        print("testing", test_device_name)
        
        # Create an index for training and testing based on the device name
        if test_device_name == "Tuor6A_chiplet_5_device_C":
            test_index = np.logical_or(
                device_names == "Tuor6A_chiplet_5_device_C_cooldown_1",
                device_names == "Tuor6A_chiplet_5_device_C_cooldown_2",
            )
        else:
            test_index = device_names == test_device_name
        train_index = np.logical_not(test_index)
        
        # Split the data into training and testing sets based on the index
        _names.append(names[test_index])
        _device_names.append(device_names[test_index])
        X_train_real, X_test_real = X[train_index], X[test_index]
        y_train_real, y_test_real = y[train_index], y[test_index]
        names_train, names_test = names[train_index], names[test_index]
        y_train_real = np.array(y_train_real, dtype=int)

        # Determine the number of augmentations required to reach the desired total number of samples
        n_augmentations_real = n_total_samples // (len(X_train_real) * chunksize)

        # Repeat the training data to match the number of augmentations
        X_train_real = np.repeat(X_train_real, n_augmentations_real, axis=0)
        y_train_real = np.repeat(y_train_real, n_augmentations_real, axis=0)

        # Augment the real data
        # print("augmenting the real data this many times: ", n_augmentations_real)
        # print("# data in real training data", len(X_train_real))
        X_train_real_new = []
        y_train_real_new = []
        for n_aug in tqdm(range(chunksize + 1)):
            _X_train_real = augmentations.augment_batch_mp(X_train_real, n_workers=20)
            X_train_real_new.append(_X_train_real)
            y_train_real_new.append(y_train_real)
        X_train_real = np.array(X_train_real_new)
        y_train_real = np.array(y_train_real_new)

        # Reshape the augmented data to match the required input shape for the model
        X_train_real = X_train_real.reshape(
            (-1, X_train_real.shape[-3], X_train_real.shape[-2], X_train_real.shape[-1])
        )
        y_train_real = y_train_real.reshape(-1)

        # Randomly shuffle the training data and take the first n_total_samples entries      
        idx = np.random.permutation(len(X_train_real))
        X_train = X_train_real[idx]
        y_train = y_train_real[idx]
        X_train = X_train[:n_total_samples]
        y_train = y_train[:n_total_samples]

        # print("len total data", len(X_train))

        # Create a dataset and dataloader for the training data
        dataset = Triangles(imgs=X_train, labels=y_train)
        dataloader = DataLoader(
            dataset, batch_size=n_episodes, shuffle=True, num_workers=0
        )

        # Calculate class weights to handle class imbalance
        class_weights = sklearn.utils.class_weight.compute_class_weight(
            "balanced", [0, 1], y_train
        )
        class_weights = torch.FloatTensor(class_weights).to(device)
        
        # Create a new model, optimizer, scheduler, and loss function for this repetition
        net, optimizer, scheduler, criterion = get_net_optimiser_scheduler_criterion(
            device, class_weights=class_weights
        )

        # Initialize lists to store loss and learning rate history for this repetition
        loss_history = []
        lr_history = []
        
        # Train the model for n_epochs        
        for epoch in tqdm(range(n_epochs)):
            for i_batch, sample_batched in enumerate(dataloader):
                X_train_minibatch = sample_batched["image"].to(device).float()
                y_train_minibatch = sample_batched["label"].to(device).long()

                # Forward pass, calculate loss, backward pass and optimize
                optimizer.zero_grad()
                outputs = net(X_train_minibatch)
                loss = criterion(outputs, y_train_minibatch)
                loss.backward()
                optimizer.step()

            # Record the loss and learning rate for this epoch
            loss_history.append(loss.item())
            lr_history.append(optimizer.param_groups[0]["lr"])

            scheduler.step(loss.item())
            
            # Print loss every print_every_n_epoch epochs
            if epoch % print_every_n_epoch == print_every_n_epoch - 1:
                print("[%d] loss: %.7f" % (epoch + 1, loss.item()))
                print("learning rate now:", optimizer.param_groups[0]["lr"])
                
        # Evaluate the model after training
        net.eval()
        outputs = net(torch.FloatTensor(X_test_real).to(device))
        _predicted = torch.max(outputs.data, 1).indices.detach().cpu()
        m = nn.Softmax()
        _scores = m(outputs).detach().cpu().numpy()

        # Store the predictions, scores, and true labels for this repetition
        predicted.append(_predicted)
        scores.append(_scores[:, 1])
        y_test_this_rep.append(y_test_real)

        # print(sklearn.metrics.confusion_matrix(y_test_real, _predicted, labels=[0, 1]))
        net.train()

    # Combine the predictions, scores, and labels from all folds
    _names = np.hstack(_names)
    _device_names = np.hstack(_device_names)
    predicted = np.hstack(predicted)
    scores = np.hstack(scores)
    y_test_this_rep = np.hstack(y_test_this_rep)

    
    # Record the results of this repetition in the results dictionary
    results_only_real_data = training_utils.record_results(
        results_only_real_data,
        predicted,
        scores,
        y_test_this_rep,
        _device_names,
        _names,
    )

    # Print the results for this repetition
    training_utils.report_results(results_only_real_data)
    
    # Save the results to a file
    pickle.dump(
        results_only_real_data, open(path + "/results_only_real_data.pkl", "wb")
    )

# Training with mixed data

In [None]:
NAME_OF_RUN = "2021XXXX_mixed_data_" + device_type

path = "data/"

path = path + NAME_OF_RUN

if not os.path.exists(path):
    os.mkdir(path)
path_networks = "data/saved_networks/" + NAME_OF_RUN
if not os.path.exists(path_networks):
    os.mkdir(path_networks)

In [None]:
n_epochs = 100
n_episodes = 128  # None #only make a single batch
n_repetitions = 10

img_size = (100, 100)
n_imgs_per_batch = 12500
n_augmentations = 2  # 10
chunksize = 10

print_every_n_epoch = 33

In [None]:
# Initialize an empty dictionary to store the results
results_mixed_data = training_utils.get_results_dict()

# This loop is running the training process multiple times (n_repetitions)
for rep in range(len(results_mixed_data["AUC"]), n_repetitions):
    print("this is rep", rep)
    
    # Simulating a batch of images for training
    print("simulating a new batch with", n_imgs_per_batch, "samples")
    X_train_sim, y_train_sim = simulation.simulate(n_imgs_per_batch)
    X_train_sim = np.array(X_train_sim)
    y_train_sim = np.array(y_train_sim, dtype=int)

    # Augment the simulated batch of data
    X_train_new = []
    y_train_new = []
    for n_aug in tqdm(range(n_augmentations)):
        _X_train = augmentations.augment_batch_mp(
            X_train_sim, shear_and_stretch=False, n_workers=20
        )
        X_train_new.append(_X_train)
        y_train_new.append(y_train_sim)
        
    # Reshape the augmented data to match the required input shape for the model
    X_train_sim = np.array(X_train_new)
    y_train_sim = np.array(y_train_new)
    X_train_sim = X_train_sim.reshape(
        (-1, X_train_sim.shape[-3], X_train_sim.shape[-2], X_train_sim.shape[-1])
    )
    y_train_sim = y_train_sim.reshape(-1)

    # print("augmented sim shape", X_train_sim.shape)
    
    # Initialize lists to store predictions, scores, labels, names and device names for each repetition
    predicted = []
    scores = []
    y_test_this_rep = []
    _names = []
    _device_names = []

    fold = 0

    for test_device_name in test_devices:
        print("testing", test_device_name)
    
        # There is one device that shows up under two names
        if test_device_name == "Tuor6A_chiplet_5_device_C":
            test_index = np.logical_or(
                device_names == "Tuor6A_chiplet_5_device_C_cooldown_1",
                device_names == "Tuor6A_chiplet_5_device_C_cooldown_2",
            )
        else:
            test_index = device_names == test_device_name
        train_index = np.logical_not(test_index)
        
        # Split the data into training and testing sets based on the index
        _names.append(names[test_index])
        _device_names.append(device_names[test_index])
        X_train_real, X_test_real = X[train_index], X[test_index]
        y_train_real, y_test_real = y[train_index], y[test_index]
        names_train, names_test = names[train_index], names[test_index]
        y_train_real = np.array(y_train_real, dtype=int)

        # Determine the number of augmentations required to reach the desired total number of samples
        n_augmentations_real = len(X_train_sim) // (len(X_train_real) * chunksize)

        # Repeat the training data to match the number of augmentations
        X_train_real = np.repeat(X_train_real, n_augmentations_real, axis=0)
        y_train_real = np.repeat(y_train_real, n_augmentations_real, axis=0)

        # print("augmenting the real data this many times: ", n_augmentations_real)
        # print("# data in real training data", len(X_train_real))
        X_train_real_new = []
        y_train_real_new = []
        # Augment the real data
        for n_aug in tqdm(range(chunksize + 1)):
            _X_train_real = augmentations.augment_batch_mp(X_train_real, n_workers=20)
            X_train_real_new.append(_X_train_real)
            y_train_real_new.append(y_train_real)
        
        # Reshape the augmented data to match the required input shape for the model
        X_train_real = np.array(X_train_real_new)
        y_train_real = np.array(y_train_real_new)
        X_train_real = X_train_real.reshape(
            (-1, X_train_real.shape[-3], X_train_real.shape[-2], X_train_real.shape[-1])
        )
        y_train_real = y_train_real.reshape(-1)
    
        # Randomly shuffle the training data and take the first n_total_samples entries
        idx = np.random.permutation(len(X_train_real))
        X_train_real = X_train_real[idx]
        y_train_real = y_train_real[idx]
        X_train_real = X_train_real[: len(X_train_sim)]
        y_train_real = y_train_real[: len(X_train_sim)]

        print("len sim data", len(X_train_sim), ", len real data", len(X_train_real))
        
        # Combine the simulated and real training data
        X_train = np.vstack([X_train_sim, X_train_real])
        y_train = np.hstack([y_train_sim, y_train_real])

        print("len total data", len(X_train))
        
        # Create a PyTorch DataLoader for the combined training data
        dataset = Triangles(imgs=X_train, labels=y_train)
        dataloader = DataLoader(
            dataset, batch_size=n_episodes, shuffle=True, num_workers=0
        )

        # Compute class weights to handle class imbalance in the training data
        class_weights = sklearn.utils.class_weight.compute_class_weight(
            "balanced", [0, 1], y_train
        )
        class_weights = torch.FloatTensor(class_weights).to(device)
        
        # Get the network, optimizer, learning rate scheduler, and loss function
        net, optimizer, scheduler, criterion = get_net_optimiser_scheduler_criterion(
            device, class_weights=class_weights
        )

        # Initialize lists to store loss and learning rate history for this repetition
        loss_history = []
        lr_history = []
        
        # Train the model for n_epochs
        for epoch in tqdm(range(n_epochs)):
            for i_batch, sample_batched in enumerate(dataloader):
                X_train_minibatch = sample_batched["image"].to(device).float()
                y_train_minibatch = sample_batched["label"].to(device).long()
                
                # Forward pass, calculate loss, backward pass and optimize
                optimizer.zero_grad()
                outputs = net(X_train_minibatch)
                loss = criterion(outputs, y_train_minibatch)
                loss.backward()
                optimizer.step()

            # Record the loss and learning rate for this epoch
            loss_history.append(loss.item())
            lr_history.append(optimizer.param_groups[0]["lr"])

            scheduler.step(loss.item())
            
            # Print loss every print_every_n_epoch epochs
            if epoch % print_every_n_epoch == print_every_n_epoch - 1:
                print("[%d] loss: %.7f" % (epoch + 1, loss.item()))
                print("learning rate now:", optimizer.param_groups[0]["lr"])
        
        # Evaluate the model after training
        net.eval()
        outputs = net(torch.FloatTensor(X_test_real).to(device))
        _predicted = torch.max(outputs.data, 1).indices.detach().cpu()
        m = nn.Softmax()
        _scores = m(outputs).detach().cpu().numpy()

        # Store the predictions, scores, and true labels for this repetition
        predicted.append(_predicted)
        scores.append(_scores[:, 1])
        y_test_this_rep.append(y_test_real)
        
        # Print the confusion matrix for this repetition
        print(sklearn.metrics.confusion_matrix(y_test_real, _predicted, labels=[0, 1]))
        
        # Switch the model back to training mode for the next repetition
        net.train()
    
    # Combine the predictions, scores, and labels from all folds
    _names = np.hstack(_names)
    _device_names = np.hstack(_device_names)
    predicted = np.hstack(predicted)
    scores = np.hstack(scores)
    y_test_this_rep = np.hstack(y_test_this_rep)

    # Record the results of this repetition of the training and testing process
    results_mixed_data = training_utils.record_results(
        results_mixed_data, predicted, scores, y_test_this_rep, _device_names, _names
    )

    # Print results
    training_utils.report_results(results_mixed_data)

    # Save the results to a pickle file
    pickle.dump(results_mixed_data, open(path + "/results_mixed_data.pkl", "wb"))