In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision

from tqdm import tqdm
from copy import deepcopy
from timeit import default_timer
from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix, log_loss
from sklearn.metrics import RocCurveDisplay, roc_curve, auc


import torch.nn as nn
import torch.nn.functional as F

from torchsummary import summary
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torch.utils.data import random_split

import gc
# from numba import cuda

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
simulations = np.load('/content/drive/MyDrive/simulations1.npy', allow_pickle=True)
dataset = np.load('/content/drive/MyDrive/dataset.npy', allow_pickle=True)

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# N = simulations.shape[0]
N = 100
X = torch.from_numpy(simulations[:N].reshape(N, 1, 513, 513)).float().to(device)
y = torch.from_numpy(dataset[:N].reshape(N, 1, 513, 513)).float().to(device)

In [5]:
del simulations
del dataset

In [10]:
def train(model, criterion, optimizer, X, y, n_epochs, n_stop=10):
    bs = 50
    # total_size = dataset.shape[0]
    # train_dataset, val_dataset, test_dataset = random_split(dataset, 
                                            # [int(total_size * 0.7), int(total_size * 0.2), int(total_size * 0.1)])
    X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)
    train_data = TensorDataset(X_train, y_train)
    val_data = TensorDataset(X_val, y_val)
    test_data = TensorDataset(X_test, y_test)

    train_loader = DataLoader(train_data, batch_size=bs)
    val_loader = DataLoader(val_data, batch_size=bs)
    test_loader = DataLoader(test_data, batch_size=bs)

    train_loss = list()
    val_loss = list()
    min_val_loss = np.inf
    f = 0
    t0 = default_timer()
    for epoch in range(n_epochs):
        t1 = default_timer()
        model.train()
        train_batch_loss = list()
        for batch in train_loader:
            optimizer.zero_grad()
            _X, _y = batch
            _X, _y = _X.to(device), _y.to(device)
            _y_pred = model(_X).view(-1, 513, 513)
            # print(_y_pred.size(), _y.size())
            loss = criterion(_y_pred, _y)
            loss.backward()
            optimizer.step()
            train_batch_loss.append(loss.item())

        model.eval()
        val_batch_loss = list()
        for batch in val_loader:
            _X, _y = batch
            _X, _y = _X.to(device), _y.to(device)
            _y_pred = model(_X).view(-1, 513, 513)
            loss = criterion(_y_pred, _y)
            val_batch_loss.append(loss.item())

        t2 = default_timer()

        if epoch % 10 == 0:
            print(f'Epoch: {epoch} ({round(t2-t1, 3)}s, {round(t2-t0, 3)}s), \tTrain loss: {np.mean(train_batch_loss).round(3)}, \tValidation loss: {np.mean(val_batch_loss).round(3)}')
        train_loss.append(np.mean(train_batch_loss))
        val_loss.append(np.mean(val_batch_loss))

        # (optional) early stopping:

        if round(val_loss[-1], 3) >= min_val_loss:
            f = f + 1
        else:
            # print(f'epoch: {epoch}, val loss did decrease, saving model')
            f = 0
            best_model = deepcopy(model)
            min_val_loss = round(val_loss[-1], 3)
        if f >= n_stop:
            print(f'epoch: {epoch}, val loss did not decrease for {f} epoch(s)')
            break

    return best_model, train_loss, val_loss

In [7]:
!nvidia-smi

Fri May  5 13:03:04 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P0    26W /  70W |    803MiB / 15360MiB |      9%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [8]:
# Define the CNN architecture
class CNN(torch.nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=3, padding=1)
    self.pool1 = torch.nn.MaxPool2d(kernel_size=2)
    self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=3, padding=1)
    self.pool2 = torch.nn.MaxPool2d(kernel_size=2)
    self.fc1 = torch.nn.Linear(64 * 128 * 128, 128)
    self.fc2 = torch.nn.Linear(128, 513 * 513)

  def forward(self, x):
    x = self.conv1(x)
    x = torch.relu(x)
    x = self.pool1(x)
    x = self.conv2(x)
    x = torch.relu(x)
    x = self.pool2(x)
    x = x.view(-1, 64 * 128 * 128)
    x = self.fc1(x)
    x = torch.relu(x)
    x = self.fc2(x)
    return x

In [9]:
# Instantiate the CNN
model = CNN().to(device)
model.cuda()

# Define the loss function and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

model, train_loss, val_loss = train(model, criterion, optimizer, X, y, n_epochs=10000, n_stop=10)

torch.Size([50, 513, 513]) torch.Size([50, 1, 513, 513])


  return F.mse_loss(input, target, reduction=self.reduction)


torch.Size([30, 513, 513]) torch.Size([30, 1, 513, 513])


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch: 0 (7.779s, 7.779s), 	Train loss: 27309.019, 	Validation loss: 22605.18
torch.Size([50, 513, 513]) torch.Size([50, 1, 513, 513])


OutOfMemoryError: ignored

In [None]:
plt.plot(train_loss, label='train loss')
plt.plot(val_loss, label='validation loss')
plt.legend()
plt.ylabel('CE Loss')
plt.xlabel('# of epoch')
plt.show()