In [None]:
import pickle
import pandas as pd
import numpy as np
import time
import math
import os
from PIL import Image
from sklearn.model_selection import train_test_split

import datetime
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim


a1dir = "../../../scratch/lt2326-h21/a1"
imgs = os.listdir(a1dir+'/images')

batch_size = 4

In [None]:
def get_meta(direc):
    
    df = pd.read_json(direc + "/train.jsonl", lines=True)
    imgs = os.listdir(direc+'/images')
    #print(df.columns)
    df =  df[df['file_name'].isin(imgs)]
    df = df.drop(['ignore', 'image_id', 'height', 'width'], axis=1)    
    return df

In [None]:
df = get_meta(a1dir)

In [None]:
# this literally doesn't even take a second so no worries
def get_bboxes(df, k=None):
    
    if k is not None:
        df = df.sample(k, random_state=42)
    
    all_bboxes = {}
    for file in df.iterrows():
        img_bboxes = [] # bboxes per image
        for sign in file[1]['annotations']:
            for d in sign:
                if d['is_chinese']:
                    x, y, w, h = d['adjusted_bbox']
                    xmin = int(math.floor(x))
                    xmax = int(math.ceil(x + w))
                    ymin = int(math.floor(y))
                    ymax = int(math.ceil(y + h))
                    bbox = np.array([xmin, ymin, xmax, ymax])
                    img_bboxes.append(bbox)


        all_bboxes[file[1]['file_name']] = np.array(img_bboxes)
    
    return all_bboxes
    

In [None]:
tic = time.perf_counter()
all_bboxes = get_bboxes(df)
toc = time.perf_counter()
print(f"Finished in {toc - tic:0.4f} seconds")

In [None]:
def get_labels(bboxes=all_bboxes):
    
    print(f'Time now: {datetime.datetime.now().strftime("%H:%M:%S")}')
    
    p =  [(img_i0, img_i1) for img_i0, img_i1 in np.ndindex(2048, 2048)] # first two numbers of indices of each image

    tic = time.perf_counter()
    labelling = {}
    count = 0
    for fn, bbox in all_bboxes.items():
        tac = time.perf_counter()
        lab = ((p>=bbox[:,None,:2]) & (p<=bbox[:,None,2:])).all(2) # thanks https://stackoverflow.com/questions/62235257/determine-if-many-points-are-in-bounding-box
        lab2 = lab.reshape(len(bbox), 2048, 2048).astype(int)
        #lab2int = lab2.astype(int)
        lab2intsumclip = np.clip(np.sum(lab2, axis = 0), 0, 1).reshape(2048, 2048, 1)
        #lab2intsumreshape = lab2intsumclip.reshape(2048, 2048, 1)

        labelling[fn] = lab2intsumclip
        count += 1
        tuc = time.perf_counter()
        print(f"Finished image {fn} in {tuc - tac:0.4f} seconds, {count}/{len(all_bboxes)}")


    toc = time.perf_counter()
    final_time = str(datetime.timedelta(seconds=round(toc - tic)))
    s_per_img = (toc - tic)/len(all_bboxes)
    
    print(f"Finished in {final_time}\nAvg. time per img: {s_per_img} seconds.")
    print(f'Finished at: {datetime.datetime.now().strftime("%H:%M:%S")}')
    
    return labelling


In [None]:

#labelling = get_labels()

In [None]:
#pickle.dump(labelling, open("labelling_alsotrash.p", "wb"))

## Start running here if you have labels

In [None]:
labelling = pickle.load(open("labelling_small.p", "rb")) 

In [None]:
file_names = labelling.keys()

In [None]:
def get_imgs(filenames, datadir, imgs):
    return {x : np.array(Image.open("{}/{}".format(datadir, x)).convert('RGB')) for x in filenames if x in imgs}

In [None]:
data = get_imgs(file_names, a1dir+'/images', imgs)

In [None]:
label_arrs = list(labelling.values())

In [None]:
img_arrs = list(data.values()) 

## PART 2

In [None]:

device = torch.device('cuda:2')

# training hyperparameters
batch_size = 4
lr = 0.01
epochs = 3

In [None]:
X_train, X_test, y_train, y_test = train_test_split(img_arrs, label_arrs, train_size=0.8, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, train_size=0.5, random_state=42) 

In [None]:
dataload_train = DataLoader(list(zip(X_train, y_train)), batch_size=batch_size, shuffle=True)
dataload_test = DataLoader(list(zip(X_test, y_test)), batch_size=batch_size, shuffle=True)
dataload_val = DataLoader(list(zip(X_val, y_val)), batch_size=batch_size, shuffle=True)


In [None]:
class PrintLayer(nn.Module):
    def __init__(self):
        super(PrintLayer, self).__init__()
    
    def forward(self, x):
        print(x.shape)
        return x


class Lenet(nn.Module):
    def __init__(self, outsize):
        super(Lenet, self).__init__()
        
        self.cnn = nn.Sequential(            
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5),      # [B, 3, 2048, 2048] -> [B, 6, 2044, 2044]
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=3),                                  # [B, 6, 2044, 2044] -> [B, 6, 681, 681]
                        
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),     # [B, 6, 681, 681] -> [B, 16, 677, 677]
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),                                  # [B, 16, 677, 677] -> [B, 16,  338, 338]
            
            nn.Conv2d(in_channels=16, out_channels=12, kernel_size=5),    # [B, 16,  338, 338] -> [B, 12, 334, 334]
            nn.Tanh(),            
            nn.AvgPool2d(kernel_size=3),                                  # [B, 12, 334, 334] -> [B, 12, 111, 111]
            #PrintLayer(),            
        )
        
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=111*111*12, out_features=1000), 
            nn.ReLU(),
            nn.Linear(in_features=1000, out_features=outsize),
            nn.Sigmoid()
        )        
    
    def forward(self, x):
        x = self.cnn(x) 
        #print(x.shape)
        pred = self.fc(x)
        return pred

In [None]:


def train(model, dataloader, model_num, device=torch.device('cuda:2'), lr=0.01, e=5):    
    
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCELoss()
    model.train()
    
    upsample = nn.Upsample(size=2048*2048)
    
    print('Training')
    for e in range(e):
        total_loss = 0
        for i , data in enumerate(dataloader):
            
            X, y = data
            X = X.permute(0,3,1,2).float()
            X, y = (X.to(device), y.to(device))
            pred = model(X)
            
            
            upsampled = upsample(pred.unsqueeze(1))
        
            
            loss = criterion(upsampled.reshape(y.shape), y.float())
            
            total_loss += loss.item()
            print(total_loss/(i+1), end='\r')            
            loss.backward() 
            optimizer.step() 
            optimizer.zero_grad()
        print()
    print(f'Saving model{model_num}')
    # save model
    #torch.save(model, f"model{model_num}.pt")



In [None]:
lenet = Lenet(250)
train(net, dataload_train, '1', e=10)

In [None]:


def test(m, dataloader, device=torch.device('cuda:2')):
    
    m = m.to(device)
    mse = nn.MSELoss()
    total_loss = 0
    batch_acc = 0
    
    upsample = nn.Upsample(size=2048*2048)
    
    m.eval()
    for i, data in enumerate(dataloader):
        #print(i)
        X, y = data
        X = X.permute(0,3,2,1).float()
        X, y = (X.to(device), y.to(device))
        
        with torch.no_grad():
            pred = m(X)            
            
        upsampled = upsample(pred.unsqueeze(1))
        
        loss = mse(upsampled.reshape(y.shape), y)
        total_loss += loss.item()
        
        batch_acc += torch.sum((torch.round(upsampled.reshape(y.shape)) == y))
        
        #print(batch_acc, batch_acc2)
        
    accuracy = int(batch_acc) / ((i+1) * batch_size * 3*2048*2048)

    print(f'Accuracy: {round(accuracy,2)}, Loss: {round(total_loss,2)}')
    



In [None]:
test(net, dataload_test)

In [None]:
class Alexnet(nn.Module):
  def __init__(self, outsize=250):
    super(Alexnet, self).__init__()
    self.feature_extraction = nn.Sequential(
        nn.Conv2d(in_channels=3,  out_channels=6,  kernel_size=11,  stride=4,  padding=2,  bias=False),
        PrintLayer(),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
        
        nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5, stride=1, padding=2, bias=False),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
        
        nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1, bias=False),
        nn.ReLU(inplace=True),
        
        nn.Conv2d(in_channels=24, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False),
        nn.ReLU(inplace=True),
        
        nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3,  stride=2,  padding=0),
        PrintLayer()
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Dropout(p=0.5),
        nn.Linear(in_features=16*63*63, out_features=256), 
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.5),
        nn.Linear(in_features=256,  out_features=256),
        nn.ReLU(inplace=True),
        nn.Linear(in_features=256,  out_features=outsize),
        nn.Sigmoid()
    )
  def forward(self, x):
    x = self.feature_extraction(x)
    #x = x.view(x.size(0), 256*6*6)
    x = self.classifier(x)
    return x

In [None]:
alexnet = Alexnet()
train(alexnet, dataload_train, '1', e=2)