#Etapa 1: Importando bibliotecas


In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
import os
from numpy.random import choice as npc
import numpy as np
import time
import random
import shutil
import torchvision.datasets as dset
from PIL import Image
import pickle
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable
import matplotlib.pyplot as plt
import seaborn as sns
import time
import torch.nn as nn
import torch.nn.functional as F

#Etapa 2: Carregando o Dataset


Nesta etapa iremos carregar o dataset Omniglot. Carregaremos o [dataset](https://github.com/brendenlake/omniglot/tree/master/python) images_background e images_evaluation e iremos carregar no ambiente do Colab em duas pastas diferentes

In [7]:
from google.colab import drive
drive.mount('/content/drive')



# Caminhos originais
train_path_background = '/content/drive/MyDrive/Faculdade/2025.1/images_background/images_background'
test_path_evaluation = "/content/drive/MyDrive/Faculdade/2025.1/images_evaluation/images_evaluation"

# Novos caminhos reestruturados
train_reestruturado = '/content/images_background_reestruturado'
test_reestruturado = '/content/images_evaluation_reestruturado'

def reestruturar_omniglot(origem, destino):
    if not os.path.exists(destino):
        os.makedirs(destino)

    for idioma in os.listdir(origem):
        caminho_idioma = os.path.join(origem, idioma)
        if os.path.isdir(caminho_idioma):
            for caractere in os.listdir(caminho_idioma):
                caminho_caractere = os.path.join(caminho_idioma, caractere)
                if os.path.isdir(caminho_caractere):
                    novo_nome = f"{idioma}_{caractere}"
                    destino_caractere = os.path.join(destino, novo_nome)
                    if not os.path.exists(destino_caractere):
                        shutil.copytree(caminho_caractere, destino_caractere)

# Executa reestruturação
reestruturar_omniglot(train_path_background, train_reestruturado)
reestruturar_omniglot(test_path_evaluation, test_reestruturado)

# Usa os novos caminhos no ImageFolder
train_dataset = dset.ImageFolder(root=train_reestruturado)
test_dataset = dset.ImageFolder(root=test_reestruturado)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#Etapa 3: Pré-processando o dataset de treinamento

In [8]:
class OmniglotTrain(Dataset):

    def __init__(self, dataset, transform=None):
        super(OmniglotTrain, self).__init__()
        np.random.seed(0)
        self.dataset = dataset
        self.transform = transform
        self.img1 = None

    def __len__(self):
        return  21000000

    def __getitem__(self, index):
        image1 = random.choice(self.dataset.imgs)
        # get image from same class
        label = None
        if index % 2 == 1:
            label = 1.0
            while True:
                image2 = random.choice(self.dataset.imgs)
                if image1[1] == image2[1]:
                    break
        # get image from different class
        else:
            label = 0.0
            while True:
                image2 = random.choice(self.dataset.imgs)
                if image1[1] != image2[1]:
                    break
        image1 = Image.open(image1[0])
        image2 = Image.open(image2[0])
        image1 = image1.convert('L')
        image2 = image2.convert('L')

        if self.transform:
            image1 = self.transform(image1)
            image2 = self.transform(image2)
        return image1, image2, torch.from_numpy(np.array([label], dtype=np.float32))

#Etapa 4: Pré-processando o dataset de teste

In [9]:
class OmniglotTest(Dataset):

    def __init__(self, dataset, transform=None, times=200, way=20):
        np.random.seed(1)
        super(OmniglotTest, self).__init__()
        self.dataset = dataset
        self.transform = transform
        self.times = times
        self.way = way

    def __len__(self):
        return self.times * self.way

    def __getitem__(self, index):
        idx = index % self.way
        label = None
        # generate image pair from same class
        if idx == 0:
            self.img1 = random.choice(self.dataset.imgs)
            while True:
                img2 = random.choice(self.dataset.imgs)
                if self.img1[1] == img2[1]:
                    break
        # generate image pair from different class
        else:
            while True:
                img2 = random.choice(self.dataset.imgs)
                if self.img1[1] != img2[1]:
                    break

        img1 = Image.open(self.img1[0])
        img2 = Image.open(img2[0])
        img1 = img1.convert('L')
        img2 = img2.convert('L')

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)
        return img1, img2

##Etapa 5: Modelo

In [10]:
class Siamese(nn.Module):

    def __init__(self):
        super(Siamese, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 64, 10),  # 64@96*96
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 64@48*48
            nn.Conv2d(64, 128, 7),
            nn.ReLU(),    # 128@42*42
            nn.MaxPool2d(2),   # 128@21*21
            nn.Conv2d(128, 128, 4),
            nn.ReLU(), # 128@18*18
            nn.MaxPool2d(2), # 128@9*9
            nn.Conv2d(128, 256, 4),
            nn.ReLU(),   # 256@6*6
        )
        self.liner = nn.Sequential(nn.Linear(9216, 4096), nn.Sigmoid())
        self.out = nn.Linear(4096, 1)

    def forward_one(self, x):
        x = self.conv(x)
        x = x.view(x.size()[0], -1)
        x = self.liner(x)
        return x

    def forward(self, x1, x2):
        out1 = self.forward_one(x1)
        out2 = self.forward_one(x2)
        dis = torch.abs(out1 - out2)
        out = self.out(dis)
        #  return self.sigmoid(out)
        return out

#Etapa 6: Checagem da GPU

In [11]:
cuda = torch.cuda.is_available()
print("Cuda: " + str(cuda))          #Checking if CUDA is available

Cuda: True


#Etapa 7: Afinar distorções

In [12]:
data_transforms = transforms.Compose([
    transforms.RandomAffine(15),
    transforms.ToTensor()
])

#Etapa 8: Carregando os dados e criando pares para treinamento e teste

In [13]:
way = 20
times = 400

dataSet = OmniglotTrain(train_dataset, transform=data_transforms)
testSet = OmniglotTest(test_dataset, transform=transforms.ToTensor(), times = times, way = way)
testLoader = DataLoader(testSet, batch_size=way, shuffle=False, num_workers=16)

dataLoader = DataLoader(dataSet, batch_size=128,\
                        shuffle=False, num_workers=16)



#Etapa 9: Função de Perda

In [14]:
loss_fn = torch.nn.BCEWithLogitsLoss(size_average=True)
learning_rate = 0.0006
net = Siamese()
train_loss = []
net.train()
if cuda:
    net.cuda()



#Etapa 10: Otimizador

In [15]:
optimizer = torch.optim.Adam(net.parameters(),lr = learning_rate )
optimizer.zero_grad()

#Etapa 11: Setando os parâmetros

In [16]:
show_every = 10
save_every = 100
test_every = 100
train_loss = []
loss_val = 0
max_iter = 90000
losses=[]
batch_ids= []

#Etapa X: Avaliacao One Shot

In [20]:
def evaluate_oneshot(net, dataset, way=20, times=400, cuda=True):
    print("Realizando avaliação One-Shot...")
    net.eval()
    correct = 0

    with torch.no_grad():
        for _ in range(times):
            # Escolhe aleatoriamente uma classe
            classes = list(set([img[1] for img in dataset.dataset.imgs]))
            selected_classes = random.sample(classes, way)
            correct_class = selected_classes[0]

            # Escolhe imagem de suporte da classe correta
            support_imgs = [img for img in dataset.dataset.imgs if img[1] == correct_class]
            support_path = random.choice(support_imgs)[0]
            support_img = Image.open(support_path).convert('L')
            support_img = dataset.transform(support_img).unsqueeze(0)
            if cuda:
                support_img = support_img.cuda()

            # Escolhe um candidato por classe (inclusive da correta)
            candidates = []
            for cls in selected_classes:
                imgs = [img for img in dataset.dataset.imgs if img[1] == cls]
                path = random.choice(imgs)[0]
                img = Image.open(path).convert('L')
                img = dataset.transform(img).unsqueeze(0)
                if cuda:
                    img = img.cuda()
                candidates.append(img)

            # Extrai embedding do suporte
            emb_support = net.forward_one(support_img)

            distances = []
            for candidate in candidates:
                emb_candidate = net.forward_one(candidate)
                dist = F.pairwise_distance(emb_support, emb_candidate)
                distances.append(dist.item())

            pred = np.argmin(distances)
            if pred == 0:
                correct += 1

    acc = correct / times * 100
    print(f"\nOne-Shot Accuracy on {way}-way {times}-trial: {correct}/{times} = {acc:.2f}%")
    return correct, times

#Etapa 12: Treinando e testando a cada 100 batch size

In [None]:
if not os.path.exists('model'):
    os.makedirs('model')

for batch_id, (img1, img2, label) in enumerate(dataLoader, 1):
    if batch_id > max_iter:
        break
    batch_start = time.time()
    if cuda:
        img1, img2, label = Variable(img1.cuda()), Variable(img2.cuda()), Variable(label.cuda())
    else:
        img1, img2, label = Variable(img1), Variable(img2), Variable(label)
    optimizer.zero_grad()
    output = net.forward(img1, img2)
    loss = loss_fn(output, label)
    loss_val += loss.item()
    loss.backward()
    optimizer.step()
    if batch_id % show_every == 0 :
        print('[%d]\tloss:\t%.5f\tTook\t%.2f s'%(batch_id, loss_val/show_every, (time.time() - batch_start)*show_every))
        batch_ids.append(float(batch_id))
        losses.append(loss_val/show_every)
        loss_val = 0
    if batch_id % save_every == 0:
        torch.save(net.state_dict(), 'model/model-batch-%d.pth'%(batch_id+1,))
    if batch_id % test_every == 0:
        evaluate_oneshot(net, testSet, way=way, times=times, cuda=cuda)
    train_loss.append(loss_val)
#  learning_rate = learning_rate * 0.95

with open('train_loss', 'wb') as f:
    pickle.dump(train_loss, f)

[10]	loss:	0.32940	Took	2.44 s
[20]	loss:	0.30769	Took	2.61 s
[30]	loss:	0.29528	Took	2.70 s
[40]	loss:	0.27448	Took	2.70 s
[50]	loss:	0.25522	Took	2.62 s
[60]	loss:	0.26124	Took	2.63 s
[70]	loss:	0.27556	Took	2.63 s
[80]	loss:	0.29228	Took	2.55 s
[90]	loss:	0.27282	Took	2.76 s
[100]	loss:	0.24809	Took	2.59 s
Realizando avaliação One-Shot...

One-Shot Accuracy on 20-way 400-trial: 241/400 = 60.25%
[110]	loss:	0.26999	Took	2.66 s
[120]	loss:	0.29848	Took	2.55 s
[130]	loss:	0.26671	Took	2.66 s
[140]	loss:	0.28302	Took	2.67 s
[150]	loss:	0.23238	Took	2.65 s
[160]	loss:	0.24908	Took	2.66 s
[170]	loss:	0.26609	Took	2.69 s
[180]	loss:	0.22290	Took	2.72 s
[190]	loss:	0.21882	Took	2.74 s
[200]	loss:	0.23268	Took	2.68 s
Realizando avaliação One-Shot...

One-Shot Accuracy on 20-way 400-trial: 249/400 = 62.25%
[210]	loss:	0.22066	Took	2.77 s
[220]	loss:	0.22199	Took	2.67 s
[230]	loss:	0.24624	Took	2.72 s
[240]	loss:	0.23821	Took	2.78 s
[250]	loss:	0.25610	Took	2.69 s
[260]	loss:	0.21627	Took	2.70

#Etapa 14: Visualização dos resultados