In [12]:
use_gpu = True
use_random_split = False
use_dataparallel = True


In [19]:
# All files are here
import os
import numpy as np
from PIL import Image
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from tqdm import tqdm
import sys

sys.path.append(os.path.join(os.getcwd(), 'utils'))
if use_gpu:
    from gpu_tools import select_gpu
    from gpu_tools import query_gpu
    os.environ["CUDA_INVISIBLE_DEVICES"] = ','.join([ str(obj) for obj in select_gpu(query_gpu())])

import time
import datetime

torch.manual_seed(42)


/bin/sh: nvidia-smi: command not found


<torch._C.Generator at 0x13c92bcd0>

In [20]:
# We write a function to convert the images folder to .dat type
def images_to_dat(images_folder, output_file):
    with open(output_file, "wb") as dat_file:
        for filename in sorted([f for f in os.listdir(images_folder) if f != "output.csv" and f.endswith(".jpeg")], key = lambda x: int(x.split('_')[0])):
        # for filename in os.listdir(images_folder):
            if filename.endswith(".jpeg"):
            # Read the image file
                img_path = os.path.join(images_folder, filename)
                img = cv2.imread(img_path)
                if img is None:
                    print(f"Failed to load {filename}")
                    continue 
                img = img > 32
                img = img.astype(np.uint8) * 255
                img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
                img_array = np.array(img)

                # Flatten the image and convert to bytes, for easier storage
                image_bytes = img_array.flatten().tobytes()

                # Write the image data and store that in .dat file
                dat_file.write(image_bytes)

In [21]:
# Use the function
images_to_dat("training_data", "training_data.dat")
images_to_dat("validation_data", "validation_data.dat")

In [22]:
# We then create an array of images
# This is a breakthrough, at least for me, since I cannot do anything to store a large amount like that
# to a list, until I find np.memmap
training_data = []
training_data.append(np.memmap("training_data.dat", dtype = np.uint8, mode = 'r').reshape(
                        (-1, 64, 60))) # since it is 60 x 60 images
training_data = np.concatenate(training_data)
print(training_data.shape)

(287102, 64, 60)


In [23]:
# Similarly, for validation_data
validation_data = []
validation_data.append(np.memmap("validation_data.dat", dtype = np.uint8, mode = 'r').reshape(
    (-1, 64, 60)))
validation_data = np.concatenate(validation_data)
print(validation_data.shape)

(123045, 64, 60)


In [26]:
# For output

def extract_outputs(images_folder):
    outputs = []

    for filename in sorted([f for f in os.listdir(images_folder) if f != "output.csv" and f.endswith(".jpeg")], key = lambda x: int(x.split('_')[0])):
        if filename.endswith('.jpeg'):  # Adjust based on file extension
            # Split the filename to extract the output value
            parts = filename.split('_')
            if len(parts) > 4:  # Ensure there are enough parts
                output_value = parts[-1].split('.')[0]  # Get the last part before the extension
                outputs.append(int(output_value))  # Convert to integer and store
            
    return outputs

def save_outputs_to_csv(images_folder, output_file):
    outputs = extract_outputs(images_folder)
    
    # Create the DataFrame
    df = pd.DataFrame({'output': outputs})
    
    # Save DataFrame as a Feather file
    df.to_csv(output_file)

In [27]:
save_outputs_to_csv("training_data", "training_outputs.csv")
save_outputs_to_csv("validation_data", "validation_outputs.csv")

In [28]:
training_outputs = pd.read_csv("training_outputs.csv")
training_outputs = training_outputs.drop(training_outputs.columns[0], axis = 1)
print(f"Training Label Shape: {training_outputs.shape}")
validation_outputs = pd.read_csv("validation_outputs.csv")
validation_outputs = validation_outputs.drop(validation_outputs.columns[0], axis = 1)
print(f"Validation Label Shape: {validation_outputs.shape}")

Training Label Shape: (287102, 1)
Validation Label Shape: (123045, 1)


In [29]:
# Change both to numpy array
training_outputs = training_outputs.to_numpy()
validation_outputs = validation_outputs.to_numpy()

In [74]:
# Try Squeeze Shape
training_outputs = training_outputs.squeeze()
validation_outputs = validation_outputs.squeeze()
print(f"Squeeze shape of training outputs: {training_outputs.shape}")
print(f"Squeeze shape of validation outputs: {validation_outputs.shape}")

Squeeze shape of training outputs: (287102,)
Squeeze shape of validation outputs: (123045,)


In [75]:
# Build Dataset
class MyDataset(Dataset):
    def __init__(self, img, label):
        self.img = torch.Tensor(img.copy())
        self.label = torch.Tensor(label)
        self.len = len(img)
    
    def __len__(self):
        return self.len
    
    def __getitem__(self, idx):
        return self.img[idx], self.label[idx]

In [76]:
# Convert the numpy array of training data and validation data
# to Tensor
training_dataset = MyDataset(training_data, training_outputs)
validation_dataset = MyDataset(validation_data, validation_outputs)

In [77]:
# We need DataLoader to do batch training
train_dataloader = DataLoader(training_dataset, batch_size = 128, shuffle = True, pin_memory = True)
val_dataloader = DataLoader(validation_dataset, batch_size = 256, shuffle = False, pin_memory = True)

In [78]:
# Now, we come to models
# Initialization of weights
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.)
    elif isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight)

In [79]:
torch.cuda.is_available()

False

In [80]:
from models import baseline

use_gpu = torch.cuda.is_available()
device = 'cuda' if use_gpu else 'cpu'
print(f"Device: {device}")
export_onnx = True
net = baseline.Net().to(device)
net.apply(init_weights)

if export_onnx:
    import torch.onnx
    x = torch.randn([1,1,64,60]).to(device)
    torch.onnx.export(net,               # model being run
                      x,                         # model input (or a tuple for multiple inputs)
                      "../cnn_baseline.onnx",   # where to save the model (can be a file or file-like object)
                      export_params=False,        # store the trained parameter weights inside the model file
                      opset_version=10,          # the ONNX version to export the model to
                      do_constant_folding=False,  # whether to execute constant folding for optimization
                      input_names = ['input_images'],   # the model's input names
                      output_names = ['output_prob'], # the model's output names
                      dynamic_axes={'input_images' : {0 : 'batch_size'},    # variable length axes
                                     'output_prob' : {0 : 'batch_size'}})

Device: cpu


In [81]:
count = 0
for name, parameters in net.named_parameters():
    print(name, ':', parameters.size())
    count += parameters.numel()
print('total_parameters : {}'.format(count))

layer1.0.weight : torch.Size([64, 1, 5, 3])
layer1.0.bias : torch.Size([64])
layer1.1.weight : torch.Size([64])
layer1.1.bias : torch.Size([64])
layer2.0.weight : torch.Size([128, 64, 5, 3])
layer2.0.bias : torch.Size([128])
layer2.1.weight : torch.Size([128])
layer2.1.bias : torch.Size([128])
layer3.0.weight : torch.Size([256, 128, 5, 3])
layer3.0.bias : torch.Size([256])
layer3.1.weight : torch.Size([256])
layer3.1.bias : torch.Size([256])
fc1.1.weight : torch.Size([2, 46080])
fc1.1.bias : torch.Size([2])
total_parameters : 708866


In [82]:
from thop import profile as thop_profile

flops, params = thop_profile(net, inputs=(next(iter(train_dataloader))[0].to(device),))
print('FLOPs = ' + str(flops/1000**3) + 'G')
print('Params = ' + str(params/1000**2) + 'M')

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register count_relu() for <class 'torch.nn.modules.activation.LeakyReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
FLOPs = 36.21961728G
Params = 0.708866M


In [84]:
from torch.profiler import profile, record_function, ProfilerActivity

inputs = next(iter(train_dataloader))[0].to(device)

with profile(activities=[
        ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
    with record_function("model_inference"):
        net(inputs)

prof.export_chrome_trace("../trace.json")
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

STAGE:2024-11-16 01:11:46 11951:563953 ActivityProfilerController.cpp:314] Completed Stage: Warm Up


---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference        11.53%      54.579ms       100.00%     473.470ms     473.470ms             1  
                    aten::reshape         0.00%       8.000us         0.01%      28.000us      14.000us             2  
                       aten::view         0.00%      20.000us         0.00%      20.000us      10.000us             2  
                     aten::conv2d         0.00%      18.000us        50.23%     237.847ms      79.282ms             3  
                aten::convolution         0.02%      72.000us        50.23%     237.829ms      79.276ms             3  
               aten::_convolution       

STAGE:2024-11-16 01:11:46 11951:563953 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-11-16 01:11:46 11951:563953 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


In [85]:
# Training stage
def train_loop(dataloader, net, loss_fn, optimizer):
    
    running_loss = 0.0
    current = 0
    net.train()
    
    with tqdm(dataloader) as t:
        for batch, (X, y) in enumerate(t):
            X = X.to(device)
            y = y.to(device)
            y_pred = net(X)
            loss = loss_fn(y_pred, y.long())
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss = (len(X) * loss.item() + running_loss * current) / (len(X) + current)
            current += len(X)
            t.set_postfix({'running_loss':running_loss})
    
    return running_loss

In [86]:
def val_loop(dataloader, net, loss_fn):

    running_loss = 0.0
    current = 0
    net.eval()
    
    with torch.no_grad():
        with tqdm(dataloader) as t:
            for batch, (X, y) in enumerate(t):
                X = X.to(device)
                y = y.to(device)
                y_pred = net(X)
                loss = loss_fn(y_pred, y.long())

                running_loss += loss.item()
                running_loss = (len(X) * running_loss + loss.item() * current) / (len(X) + current)
                current += len(X)
            
    return running_loss

In [87]:
if use_gpu and use_dataparallel and 'DataParallel' not in str(type(net)):
    net = net.to(device)
    net = nn.DataParallel(net)

In [88]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-5)

start_epoch = 0
min_val_loss = 1e9
last_min_ind = -1
early_stopping_epoch = 5

from torch.utils.tensorboard import SummaryWriter
tb = SummaryWriter()

In [89]:
start_time = datetime.datetime.now().strftime('%Y%m%d_%H:%M:%S')
# os.mkdir('../pt'+os.sep+start_time)
epochs = 100
for t in range(start_epoch, epochs):
    print(f"Epoch {t}\n-------------------------------")
    time.sleep(0.2)
    train_loss = train_loop(train_dataloader, net, loss_fn, optimizer)
    val_loss = val_loop(val_dataloader, net, loss_fn)
    tb.add_histogram("train_loss", train_loss, t)
    torch.save(net, '../pt'+os.sep+start_time+os.sep+'baseline_epoch_{}_train_{:5f}_val_{:5f}.pt'.format(t, train_loss, val_loss)) 
    if val_loss < min_val_loss:
        last_min_ind = t
        min_val_loss = val_loss
    elif t - last_min_ind >= early_stopping_epoch:
        break

print('Done!')
print('Best epoch: {}, val_loss: {}'.format(last_min_ind, min_val_loss))

Epoch 0
-------------------------------


  1%|          | 20/2243 [00:20<37:18,  1.01s/it, running_loss=1.14]


KeyboardInterrupt: 