In [None]:
!pip install -U albumentations
!pip uninstall opencv-python-headless opencv-python
!pip install -I opencv-python-headless==4.5.1.48
!pip install -I opencv-python==4.5.1.48

# Evita un problema di dipendenza

In [None]:
# Collega la cartella del drive

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [2]:
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import cv2
import os
import torch
import torch.nn as nn
from torch import optim
from torch.autograd import Variable
import albumentations as A
from albumentations.pytorch import ToTensorV2
import time

In [8]:
## DATASET DAI FILE
class skyDataset(Dataset):
    def __init__(self, set_path):
        self.set_path = set_path
        self.img_paths = []
        self.last_index = [-1]
        for i in range(1,6):
            imgs = os.listdir(set_path+str(i))
            imgs.sort()
            imgs = [set_path+str(i)+"/"+p for p in imgs]
            self.img_paths += imgs
            cur_last_idx = self.last_index[-1] + len(imgs)
            self.last_index.append(cur_last_idx)
        self.img_paths = [i for i in self.img_paths if i.endswith(".JPG")]

        self.transform = A.Compose([A.CenterCrop(width=416, height=416), ToTensorV2()])

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        '''
        image = torch.load(self.img_paths[idx])
        label = int(self.img_paths[idx].split("/")[-2]) -1
        '''
        image = cv2.imread(self.img_paths[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)

        image = self.transform(image=image)["image"]

        label = int(self.img_paths[idx].split("/")[-2]) - 1  # -1 per passare da 1-6 a 0-5

        ## NUOVA LABEL BINARIA
        #label = 1 if label >= 4 else 0
        
        return image, label

train_folder_original = "/content/data/skyWatcher/reduced/train/"
test_folder_original = "/content/data/skyWatcher/reduced/test/"

''' SALVATAGGIO TENSORI
train_dataset = skyDataset(train_tensors)
test_dataset = skyDataset(test_tensors)

save_path = 'red_tensor/test/'
for i, (x,y) in enumerate(test_dataset):
    path = save_path+str(y+1)+"/"+str(i)+".pt"
    torch.save(x, path)
    print(i, y)'''
from torchvision import transforms, datasets

transform = transforms.Compose([transforms.CenterCrop(416), transforms.ToTensor()])

train_dataset = datasets.ImageFolder(root=train_folder_original, transform=transform)
test_dataset = datasets.ImageFolder(root=test_folder_original, transform=transform)


dl_train = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
dl_test = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=2)




In [None]:
## DATASET HDF5

import h5py

train_hdf5 = "/content/drive/MyDrive/skyWatcher/train.hdf5"


class skyDataset(Dataset):
    def __init__(self, hdf5_path):

        super().__init__()
        self.file = h5py.File(hdf5_path, 'r')
        self.len = self.file['x_train'].shape[0]


    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        t1 = time.time()
        image = self.file['x_train'][idx]
        image = image.astype('float32')

        label = self.file['y_train'][idx] -1
        print(time.time()-t1)
        
        return image, label


train_dataset = skyDataset(train_hdf5)
test_dataset = skyDataset(train_hdf5)

dl_train = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=1)
dl_test = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=1)

In [24]:
## DATASET NUMPY

class skyDataset(Dataset):
    def __init__(self, dataset_dim, data_path, label_path):
        super().__init__()
        self.data = np.memmap(data_path, dtype='float32', mode='r', shape=(dataset_dim, 3, 416, 416))
        self.label = np.memmap(label_path, dtype='uint8', mode='r', shape=(dataset_dim,))
        self.size = dataset_dim

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        #t1 = time.time()
        image = self.data[idx]
        image = torch.from_numpy(image)

        label = self.label[idx]
        #print(time.time() - t1)

        return image, label

train_data = '/content/drive/MyDrive/skyWatcher/np/trainF.npm'
train_label = '/content/drive/MyDrive/skyWatcher/np/trainF_label.npm'
test_data = '/content/drive/MyDrive/skyWatcher/np/testF.npm'
test_label = '/content/drive/MyDrive/skyWatcher/np/testF_label.npm'


train_dataset = skyDataset(3488, train_data, train_label)
test_dataset = skyDataset(1163, test_data, test_label)

dl_train = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=1)
dl_test = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=1)




In [26]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(3, 1, 4, 4)  # 1 out channel, 4x4 con stride 4
        self.conv2 = nn.Conv2d(1, 1, 4, 2)  # idem ma con stride 2
        self.pooling = nn.MaxPool2d(3, 2)  # 3x3 con stride 3
        self.relu = nn.ReLU()

        self.linear = nn.Linear(625, 5)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.relu(self.conv2(x))
        x = self.pooling(x)

        x = torch.flatten(x, 1)
        x = self.linear(x)
        return torch.sigmoid(x)


def eval_cf(mlp, data_loader):
    correct = 0
    total = 0

    confusion_matrix = torch.zeros(5, 5)
    with torch.no_grad():
        for x, y in data_loader:
            y_pred = mlp(x)

            y_pred = torch.argmax(y_pred, dim=1)
            acc = (y_pred == y).sum()

            correct += acc
            total += y_pred.size(0)

            for t, p in zip(y.view(-1), y_pred.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1
    return correct/total, confusion_matrix       

In [31]:
model = CNN()
epoch_number = 5
loss_fun = nn.CrossEntropyLoss()
#loss_fun = nn.BCELoss()
opt = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

import time

for i in range(epoch_number):

    train_acc, train_cm = eval_cf(model, dl_train)
    test_acc, test_cm = eval_cf(model, dl_test)
    print(f"Epoca {i}\nTrain acc.: {train_acc}, Test acc.: {test_acc}")
    print("\nConfusion matrix train")
    print(train_cm)
    print(train_cm.diag()/train_cm.sum(1))
    print("\nConfusion matrix test")
    print(test_cm)
    print(test_cm.diag()/test_cm.sum(1))
    print("\n\n\n")
    
    for x, y in dl_train:
        opt.zero_grad()
        o = model(x)

        loss = loss_fun(o, y)
        loss.backward()
        opt.step()




Epoca 0
Train acc.: 0.10407110303640366, Test acc.: 0.09888219833374023
Confusion matrix train
tensor([[   4.,  156., 1787.,    0.,    8.],
        [   5.,   44.,  441.,    0.,    3.],
        [   0.,    6.,  315.,    0.,    0.],
        [   0.,    4.,  337.,    0.,    0.],
        [   0.,    9.,  369.,    0.,    0.]])
tensor([0.0020, 0.0892, 0.9813, 0.0000, 0.0000])
Confusion matrix test
tensor([[  1.,  49., 600.,   0.,   2.],
        [  1.,  10., 153.,   0.,   1.],
        [  1.,   2., 104.,   0.,   0.],
        [  0.,   0., 114.,   0.,   0.],
        [  0.,   2., 123.,   0.,   0.]])
tensor([0.0015, 0.0606, 0.9720, 0.0000, 0.0000])








Epoca 1
Train acc.: 0.5604931116104126, Test acc.: 0.5606191158294678
Confusion matrix train
tensor([[1955.,    0.,    0.,    0.,    0.],
        [ 493.,    0.,    0.,    0.,    0.],
        [ 321.,    0.,    0.,    0.,    0.],
        [ 341.,    0.,    0.,    0.,    0.],
        [ 378.,    0.,    0.,    0.,    0.]])
tensor([1., 0., 0., 0., 0.])
Confusion matrix test
tensor([[652.,   0.,   0.,   0.,   0.],
        [165.,   0.,   0.,   0.,   0.],
        [107.,   0.,   0.,   0.,   0.],
        [114.,   0.,   0.,   0.,   0.],
        [125.,   0.,   0.,   0.,   0.]])
tensor([1., 0., 0., 0., 0.])








KeyboardInterrupt: ignored