In [None]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
import torch
torch.cuda.empty_cache()
import numpy as np
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import datetime
import matplotlib.pyplot as plt
import os
from torchsummary import summary
import random
import seaborn as sns

In [None]:
!git clone https://github.com/ML4SCI-SLC/SLC_Data.git
%cd SLC_Data

Cloning into 'SLC_Data'...
remote: Enumerating objects: 9016, done.[K
remote: Counting objects:   0% (1/9016)[Kremote: Counting objects:   1% (91/9016)[Kremote: Counting objects:   2% (181/9016)[Kremote: Counting objects:   3% (271/9016)[Kremote: Counting objects:   4% (361/9016)[Kremote: Counting objects:   5% (451/9016)[Kremote: Counting objects:   6% (541/9016)[Kremote: Counting objects:   7% (632/9016)[Kremote: Counting objects:   8% (722/9016)[Kremote: Counting objects:   9% (812/9016)[Kremote: Counting objects:  10% (902/9016)[Kremote: Counting objects:  11% (992/9016)[Kremote: Counting objects:  12% (1082/9016)[Kremote: Counting objects:  13% (1173/9016)[Kremote: Counting objects:  14% (1263/9016)[Kremote: Counting objects:  15% (1353/9016)[Kremote: Counting objects:  16% (1443/9016)[Kremote: Counting objects:  17% (1533/9016)[Kremote: Counting objects:  18% (1623/9016)[Kremote: Counting objects:  19% (1714/9016)[Kremote: Counting objects

In [None]:
path_nosub = './lenses/no_sub'
files_nosub = [os.path.join(path_nosub, f) for f in os.listdir(path_nosub) if f.endswith(".jpg")]
random.shuffle(files_nosub)

In [None]:
path_sub = './lenses/sub'
files_anom = [os.path.join(path_sub, f) for f in os.listdir(path_sub) if f.endswith(".jpg")]
random.shuffle(files_anom)

In [None]:
no_sub = [1 for i in range(4500)]
sub = [0 for i in range(4500)]

In [None]:
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]
transform = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Lambda(lambda x: x.repeat(3,1,1)),
                    transforms.Normalize(mean, std)
])

In [None]:
class AnomDataset(Dataset):
    def __init__(self, images_path, y=None, transform= None):
        self.images_path = images_path
        self.transform = transform
        self.y = y
    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, idx):
        image_path = self.images_path[idx]
        image = Image.open(image_path)
        if transform:
            image = self.transform(image)
        sample = image
        if self.y:
            sample = [image, self.y[idx]]
        return sample

In [None]:
train_dataset = AnomDataset(images_path= files_nosub[:500], transform= transform)
train = DataLoader(train_dataset, batch_size= 16, num_workers= 2)
anom_dataset = AnomDataset(images_path= files_anom[:500], transform= transform)
anom = DataLoader(anom_dataset, batch_size= 16, num_workers= 2)

In [None]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.enc = nn.Sequential(

            nn.Conv2d(3, 32, 3),
            nn.ReLU(),

            nn.Conv2d(32, 64, 3),
            nn.ReLU(),

            nn.Conv2d(64, 64, 3),
            nn.ReLU(),

            nn.Conv2d(64, 4, 3),
            nn.ReLU(),

        )
        self.dec = nn.Sequential(

            nn.ConvTranspose2d(4, 64, 3),
            nn.ReLU(),

            nn.ConvTranspose2d(64, 64, 3),
            nn.ReLU(),

            nn.ConvTranspose2d(64, 32, 3),
            nn.ReLU(),

            nn.ConvTranspose2d(32, 3, 3),
            nn.Sigmoid()

        )
    def forward(self, x):
        encode = self.enc(x)
        decode = self.dec(encode)
        return decode

In [None]:
device = 'cuda'

model = AutoEncoder()
model = model.to(device)
summary(model, (3, 150, 150))

RuntimeError: ignored

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# model_big = torch.load('/content/drive/My Drive/ML4SCI Datasets/anomaly_model_200')
# model_big = model_big.to(device)

In [None]:
summary(model_big, (3, 150, 150))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 148, 148]             896
              ReLU-2         [-1, 32, 148, 148]               0
            Conv2d-3         [-1, 64, 146, 146]          18,496
              ReLU-4         [-1, 64, 146, 146]               0
            Conv2d-5         [-1, 64, 144, 144]          36,928
              ReLU-6         [-1, 64, 144, 144]               0
            Conv2d-7          [-1, 4, 142, 142]           2,308
              ReLU-8          [-1, 4, 142, 142]               0
   ConvTranspose2d-9         [-1, 64, 144, 144]           2,368
             ReLU-10         [-1, 64, 144, 144]               0
  ConvTranspose2d-11         [-1, 64, 146, 146]          36,928
             ReLU-12         [-1, 64, 146, 146]               0
  ConvTranspose2d-13         [-1, 32, 148, 148]          18,464
             ReLU-14         [-1, 32, 1

In [None]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model_big.parameters(), lr=1e-3)

In [None]:
def fit(model, epochs, dataloader):
    ret = []
    for epoch in range(epochs):
        losses = []
        for data in dataloader:
            img, _ = data
            img = img.to(device)
            optimizer.zero_grad()
            out = model(img)
            loss = criterion(out, img)
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
                
        print(f'Epoch: {epoch+1}/{epochs} -- Train Loss: {sum(losses)/len(losses)}') 
        ret.append(sum(losses)/len(losses))
    return ret

In [None]:
start = datetime.datetime.now()
loss2 = fit(model_big, 200, dataloader)
print(datetime.datetime.now()-start)

0:00:00.000027


In [None]:
test_dataset_normal = AnomDataset(images_path= files_nosub, transform= transform, y= no_sub)
test_dataset_anom = AnomDataset(images_path= files_anom, transform= transform, y= sub)

In [None]:
loss_normal = []
loss_anom = []

for data in train:
    imgs = data
    imgs = imgs.to(device)
    out = model_big(imgs)
    loss = criterion(out, imgs)
    loss_normal.append(loss)

for data in anom:
    imgs = data
    imgs = imgs.to(device)
    out = model_big(imgs)
    loss = criterion(out, imgs)
    loss_anom.append(loss)   

RuntimeError: ignored

In [None]:
dataset = torch.utils.data.ConcatDataset([test_dataset_normal, test_dataset_anom])
dataloader = DataLoader(dataset, batch_size= 32, num_workers= 5, shuffle= True)

In [None]:
def find_preds(out):
    return None

def accuracy(out, labels):
    return torch.tensor(torch.sum(out==labels).item()/len(out))

In [None]:
for data in dataloader:
    imgs, labels = data
    imgs = imgs.to(device)
    labels = labels.to(device)
    out = model_big(imgs)
    loss = criterion(out, imgs)
    preds = find_preds(out)
    acc = accuracy(out, labels)