In [2]:
import torch
import cv2
import open_clip
import numpy as np
from torchvision import transforms
from torchvision.datasets import ImageFolder

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
model, _, preprocess = open_clip.create_model_and_transforms('ViT-g-14', pretrained='laion2b_s34b_b88k')
model.to('cuda')

In [4]:
class ImageFolderWithPaths(ImageFolder):
    def __getitem__(self, index):
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        path = self.imgs[index][0]
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [7]:
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = ImageFolderWithPaths(root="SimilarImages", transform=transform)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=False)
val_dataset = ImageFolderWithPaths(root="Final_dataset_small/Validation", transform=transform)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)

In [8]:
def get_features(dataset):
    all_features = []
    all_labels = []
    all_directories = []

    with torch.no_grad():
        for images, labels, paths in dataset:
            features = model.encode_image(images.to('cuda'))

            all_features.append(features)
            all_labels.append(labels)
            for path in paths:
                all_directories.append(path)

    return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy(), all_directories

train_features, train_labels, all_directories = get_features(train_dataloader)

# Print first 10 features
print(train_labels[:10])
print(all_directories[:10])

grouped = list(zip(train_features, train_labels, all_directories))

# Save features
np.save('features_clip.npy', grouped)

[0 0 0 0 0 0 0 0 0 0]
['SimilarImages\\Amarillo\\000018_00_1.jpg', 'SimilarImages\\Amarillo\\000075_00_1.jpg', 'SimilarImages\\Amarillo\\000101_00_1.jpg', 'SimilarImages\\Amarillo\\000102_00_1.jpg', 'SimilarImages\\Amarillo\\000174_00_1.jpg', 'SimilarImages\\Amarillo\\000175_00_1.jpg', 'SimilarImages\\Amarillo\\001720_00_1.jpg', 'SimilarImages\\Amarillo\\001829_00_1.jpg', 'SimilarImages\\Amarillo\\001831_00_1.jpg', 'SimilarImages\\Amarillo\\002212_00_1.jpg']


  arr = np.asanyarray(arr)


In [29]:
def get_features(dataset):
    all_features = []
    all_labels = []
    all_directories = []

    with torch.no_grad():
        for images, labels, paths in dataset:
            features = model.encode_image(images.to('cuda'))

            all_features.append(features)
            all_labels.append(labels)
            for path in paths:
                all_directories.append(path)

    return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy(), all_directories

train_features, train_labels, all_directories = get_features(val_dataloader)

# Print first 10 features
print(train_labels[:10])
print(all_directories[:10])

grouped = list(zip(train_features, train_labels, all_directories))

# Save features
np.save('features_clip_val.npy', grouped)

[0 0 0 0 0 0 0 0 0 0]
['Final_dataset_small/Validation\\Amarillo\\04DJVSJ7R468.jpg', 'Final_dataset_small/Validation\\Amarillo\\176968_00_1.jpg', 'Final_dataset_small/Validation\\Amarillo\\180544_00_1.jpg', 'Final_dataset_small/Validation\\Amarillo\\180546_00_1.jpg', 'Final_dataset_small/Validation\\Amarillo\\182322_00_1.jpg', 'Final_dataset_small/Validation\\Amarillo\\183479_00_1.jpg', 'Final_dataset_small/Validation\\Amarillo\\186443_00_1.jpg', 'Final_dataset_small/Validation\\Amarillo\\196319_00_1.jpg', 'Final_dataset_small/Validation\\Amarillo\\196826_00_1.jpg', 'Final_dataset_small/Validation\\Amarillo\\197726_00_1.jpg']


  arr = np.asanyarray(arr)


In [12]:
from torch import nn
import torch
from torch.nn import functional as F
import numpy as np

class Clasificador(nn.Module):
    def __init__(self, num_caracteristicas=1024, num_clases=10):
        super(Clasificador, self).__init__()

        # Define las capas de la red
        self.fc1 = nn.Linear(num_caracteristicas, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc4 = nn.Linear(256, num_clases)

        
    def forward(self, x):
        # Define cómo se realiza el paso hacia adelante
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.2)
        x = F.relu(self.fc2(x))


        # No se aplica la función de activación a la última capa
        x = self.fc4(x)

        return x
    
classificador = Clasificador()
classificador.cuda()

# Load model
classificador.load_state_dict(torch.load('best_model.pth'))

# Load features
grouped_val = np.load('features_clip_val.npy', allow_pickle=True)

# Evaluate model
from sklearn.metrics import accuracy_score

features_val, labels_val, _ = zip(*grouped_val)

features_val = np.array(features_val)

labels_val = np.array(labels_val)

preds_val = classificador(torch.from_numpy(features_val).float().cuda())

print(accuracy_score(labels_val, torch.argmax(preds_val, dim=1).cpu().numpy()))

0.9319727891156463


In [14]:
import torch
import cv2
import open_clip
import numpy as np
from torchvision import transforms
from torchvision.datasets import ImageFolder
from sklearn.metrics import accuracy_score

transform = transforms.Compose([transforms.ToTensor()])
train_dataset = ImageFolder(root="test_final_images", transform=transform)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

model, _, preprocess = open_clip.create_model_and_transforms('ViT-g-14', pretrained='laion2b_s34b_b88k')
model.to('cuda')

def get_features(dataset):
    all_features = []
    all_labels = []
    all_directories = []

    with torch.no_grad():
        for images, labels in dataset:
            features = model.encode_image(images.to('cuda'))

            all_features.append(features)
            all_labels.append(labels)
            all_directories.append(dataset.dataset.imgs[labels.cpu().numpy()[0]][0])

    return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy(), all_directories

train_features, train_labels, all_directories = get_features(train_dataloader)

preds_val = classificador(torch.from_numpy(train_features).float().cuda())

print(accuracy_score(train_labels, torch.argmax(preds_val, dim=1).cpu().numpy()))

0.7121212121212122


In [60]:
from torch import nn
import torch
from torch.nn import functional as F
import numpy as np

# Load features
grouped = np.load('features_clip.npy', allow_pickle=True)
grouped_val = np.load('features_clip_val.npy', allow_pickle=True)

class Clasificador(nn.Module):
    def __init__(self, num_caracteristicas=1024, num_clases=2):
        super(Clasificador, self).__init__()

        # Define las capas de la red
        self.fc1 = nn.Linear(num_caracteristicas, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc4 = nn.Linear(256, num_clases)

        
    def forward(self, x):
        # Define cómo se realiza el paso hacia adelante
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.2)
        x = F.relu(self.fc2(x))


        # No se aplica la función de activación a la última capa
        x = self.fc4(x)

        return x

# Crear el modelo
modelo = Clasificador(num_caracteristicas=1024, num_clases=10)
print(modelo)

# Definir la función de pérdida
criterion = nn.CrossEntropyLoss()

# Definir el optimizador
optimizer = torch.optim.Adam(modelo.parameters(), lr=0.00001)

# Definir el número de épocas
n_epochs = 3000

# One hot encoding
def one_hot_encode(arr, n_labels):
    # Initialize the the encoded array
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
    # Finally reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))

    return one_hot

# Batch generator
def batch_generator(features, labels, batch_size=32, shuffle=True):
    # Shuffle indicies
    if shuffle:
        index = np.arange(len(features))
        np.random.shuffle(index)
    
    start = 0
    while start < len(features):
        end = min(start + batch_size, len(features))
        
        if shuffle:
            excerpt = index[start:end]
        else:
            excerpt = slice(start, end)
        
        yield features[excerpt], labels[excerpt]
        
        start += batch_size


# Iterar sobre las épocas
index = 0
for epoch in range(n_epochs):
        # Iterar sobre los datos de entrenamiento
        for features, labels in batch_generator(np.array([x[0] for x in grouped]), np.array([x[1] for x in grouped])):
            # Reiniciar los gradientes
            optimizer.zero_grad()

            # Calcular la salida
            output = modelo(torch.tensor(features))

            # Calcular la pérdida
            loss = criterion(output, torch.tensor(labels))

            # Calcular los gradientes
            loss.backward()

            # Actualizar los parámetros
            optimizer.step()

            index += 1

        if index % 10 != 0: continue

        with torch.no_grad():
            correct = 0
            total = 0
            for features, labels in batch_generator(np.array([x[0] for x in grouped_val]), np.array([x[1] for x in grouped_val]), batch_size=1):
                # Calcular la salida
                output = modelo(torch.tensor(features))

                # Calcular la pérdida
                loss = criterion(output, torch.tensor(labels))

                # Calcular la predicción
                _, predicted = torch.max(output.data, 1)
                predicted = predicted.cpu().numpy()

                # Calcular el accuracy
                total += labels.shape[0]
                correct += (predicted == labels).sum()

                # Save best model
                if correct / total > 0.94:
                    torch.save(modelo.state_dict(), 'best_model.pth')


            print(f'Accuracy: {100 * correct / total}%')

Clasificador(
  (fc1): Linear(in_features=1024, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=10, bias=True)
)
Accuracy: 34.69387755102041%
Accuracy: 66.66666666666667%
Accuracy: 71.42857142857143%
Accuracy: 82.99319727891157%
Accuracy: 87.07482993197279%
Accuracy: 87.75510204081633%
Accuracy: 88.43537414965986%
Accuracy: 90.47619047619048%
Accuracy: 92.51700680272108%
Accuracy: 91.83673469387755%
Accuracy: 90.47619047619048%
Accuracy: 91.15646258503402%
Accuracy: 90.47619047619048%
Accuracy: 94.5578231292517%
Accuracy: 93.19727891156462%
Accuracy: 92.51700680272108%
Accuracy: 94.5578231292517%
Accuracy: 92.51700680272108%
Accuracy: 92.51700680272108%
Accuracy: 93.19727891156462%
Accuracy: 93.19727891156462%
Accuracy: 93.87755102040816%
Accuracy: 92.51700680272108%
Accuracy: 92.51700680272108%
Accuracy: 93.87755102040816%
Accuracy: 91.83673469387755%
Accuracy: 92.51700680272108%
Accuracy: 91.8367

KeyboardInterrupt: 