In [1]:
import numpy as np
import pickle
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
import torch.nn.functional as F

import sklearn

import os

import imgaug.augmenters as iaa


from utils import augmentations

from tqdm.notebook import tqdm

from utils.training_utils import Triangles, build_batches, get_net_optimiser_scheduler_criterion
from utils import training_utils

from torch.utils.data import Dataset, DataLoader

from importlib import reload

ModuleNotFoundError: No module named 'imgaug'

In [2]:
device_type='nanowire_low_res'

NAME_OF_RUN='20230830_lenet_all_real_data_'+device_type

path='../data/psb_detection_data/'
    
if not os.path.exists(path):
    os.mkdir(path)

path=path+NAME_OF_RUN
    
if not os.path.exists(path):
    os.mkdir(path)
   
path_networks='data/saved_networks/'+NAME_OF_RUN
if not os.path.exists(path_networks):
    os.mkdir(path_networks)

In [3]:
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
print(device)

cuda:2


In [4]:
X=np.load('../data/psb_detection_data/'+device_type+'_imgs.npy', allow_pickle=True)
y=np.load('../data/psb_detection_data/'+device_type+'_labels.npy')

resizer=iaa.Resize([100,100])

new_X=[]
for element in X:
    el0=resizer.augment_image(image=element[1])
    el1=resizer.augment_image(image=element[0])
    im=augmentations.normalise([el0,el1])
    new_X.append(im)
X=np.array(new_X)

In [8]:
n_epochs=100
n_episodes=128 #None #only make a single batch
n_repetitions=10

img_size=(100,100)

n_total_samples=50000

chunksize=10

print_every_n_epoch=33

In [7]:
results_only_real_data=training_utils.get_results_dict()

0

In [11]:
for rep in range(0,n_repetitions):
    print('This is rep', rep)

    predicted=[]
    scores=[]

    y_test_this_rep=[]
    _names=[]
    _device_names=[]

    fold=0
    
    
    X_train_real = X
    y_train_real = y
    y_train_real=np.array(y_train_real, dtype=int)

    n_augmentations_real=n_total_samples//(len(X_train_real)*chunksize)

    X_train_real=np.repeat(X_train_real,n_augmentations_real,axis=0)
    y_train_real=np.repeat(y_train_real,n_augmentations_real,axis=0)

    print('augmenting the real data this many times: ', n_augmentations_real)
    print('# data in real training data',len(X_train_real) )
    X_train_real_new=[]
    y_train_real_new=[]
    for n_aug in tqdm(range(chunksize+1)):
        _X_train_real=augmentations.augment_batch_mp(X_train_real, n_workers=20)
        X_train_real_new.append(_X_train_real)
        y_train_real_new.append(y_train_real)
    X_train_real=np.array(X_train_real_new)
    y_train_real=np.array(y_train_real_new)
    X_train_real=X_train_real.reshape((-1,X_train_real.shape[-3],X_train_real.shape[-2],X_train_real.shape[-1]))
    y_train_real=y_train_real.reshape(-1)

    idx=np.random.permutation(len(X_train_real))
    X_train=X_train_real[idx]
    y_train=y_train_real[idx]

    X_train=X_train[:n_total_samples]
    y_train=y_train[:n_total_samples]

    print('len total data', len(X_train))

    dataset = Triangles(imgs=X_train,  labels=y_train)

    dataloader = DataLoader(dataset, batch_size=n_episodes,
                        shuffle=True, num_workers=0)

    class_weights=sklearn.utils.class_weight.compute_class_weight('balanced', [0,1] , y_train)
    class_weights = torch.FloatTensor(class_weights).to(device)


    net,optimizer,scheduler, criterion =get_net_optimiser_scheduler_criterion(
        device,
        class_weights=class_weights, 
        model_type ='lenet')

    loss_history=[]
    lr_history=[]
    for epoch in tqdm(range(n_epochs)):

        for i_batch, sample_batched in enumerate(dataloader):
            X_train_minibatch=sample_batched['image'].to(device).float()
            y_train_minibatch=sample_batched['label'].to(device).long()

            optimizer.zero_grad()
            outputs = net(X_train_minibatch)
            loss = criterion(outputs, y_train_minibatch)
            loss.backward()
            optimizer.step()

            #running_loss += loss.item()
        loss_history.append(loss.item())
        lr_history.append(optimizer.param_groups[0]['lr'])

        scheduler.step(loss.item())
        if epoch % print_every_n_epoch==print_every_n_epoch-1:
            print('[%d] loss: %.7f' %
                  (epoch + 1, loss.item()))
            print('learning rate now:', optimizer.param_groups[0]['lr'])

    torch.save(net.state_dict(), path_networks+'/all_real_data_rep_'+str(rep)+'.pth')

This is rep 5
augmenting the real data this many times:  7
# data in real training data 4382


  0%|          | 0/11 [00:00<?, ?it/s]

len total data 48202




  0%|          | 0/100 [00:00<?, ?it/s]

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[33] loss: 0.0010598
learning rate now: 1.0000000000000002e-06
[66] loss: 0.0115308
learning rate now: 1.0000000000000004e-08
[99] loss: 0.0040562
learning rate now: 1.0000000000000004e-08
This is rep 6
augmenting the real data this many times:  7
# data in real training data 4382


  0%|          | 0/11 [00:00<?, ?it/s]

len total data 48202


  0%|          | 0/100 [00:00<?, ?it/s]

[33] loss: 0.0073340
learning rate now: 0.0001
[66] loss: 0.0035647
learning rate now: 1.0000000000000004e-08
[99] loss: 0.0389209
learning rate now: 1.0000000000000004e-08
This is rep 7
augmenting the real data this many times:  7
# data in real training data 4382


  0%|          | 0/11 [00:00<?, ?it/s]

len total data 48202




  0%|          | 0/100 [00:00<?, ?it/s]

[33] loss: 0.0145077
learning rate now: 0.0001
[66] loss: 0.0132519
learning rate now: 1.0000000000000002e-07
[99] loss: 0.0008533
learning rate now: 1.0000000000000004e-08
This is rep 8
augmenting the real data this many times:  7
# data in real training data 4382


  0%|          | 0/11 [00:00<?, ?it/s]

len total data 48202


  0%|          | 0/100 [00:00<?, ?it/s]

[33] loss: 0.0010352
learning rate now: 1e-05
[66] loss: 0.0079772
learning rate now: 1.0000000000000004e-08
[99] loss: 0.0051205
learning rate now: 1.0000000000000004e-08
This is rep 9
augmenting the real data this many times:  7
# data in real training data 4382


  0%|          | 0/11 [00:00<?, ?it/s]

len total data 48202




  0%|          | 0/100 [00:00<?, ?it/s]

[33] loss: 0.0006797
learning rate now: 1e-05
[66] loss: 0.0005676
learning rate now: 1.0000000000000004e-08
[99] loss: 0.0010039
learning rate now: 1.0000000000000004e-08
