**Redimensionamiento de imagenes**

Lo que haremos sera a partir del Dataset STL-10 el cual contiene imagenes de 96 x 96 pixeles reducir dichas imagenes a 66 x 96 piexeles para verificar si la presicion o validation accuracy se mantiene similar, aumenta o disminute. Esto con el proposito de verificar si rel redimensionamiento de imagenes afecta en el entrenamiento.

In [1]:

import torch
import numpy as np
import pandas as pd
import torch.nn as nn

from torchvision import models,datasets,transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

from torchsummary import summary
from torchvision.utils import make_grid
from torchvision.datasets import STL10
import itertools
import torch.optim as optim
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve

Dataset contiene 13000 imagenes clasificadas en 10 catergorias o etiquetas.

In [25]:
import numpy as np
from skimage import io
import albumentations as A
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"


def rgb_to_gray(img):
    return np.dot(img[...,:3], [0.2989, 0.5870, 0.1140])

# Dataset personalizado
class Dataset(torch.utils.data.Dataset):
    def __init__(self, X, y, trans=None, device="cpu", to_gray=True):
        self.X = X
        self.y = y
        self.trans = trans
        self.device = device
        self.to_gray = to_gray

    def __len__(self):
        return len(self.X)

    def __getitem__(self, ix):
        img = self.X[ix]

        # Convertir a escala de grises si es necesario
        if self.to_gray:
            img = rgb_to_gray(img)

        # Aplicar transformaciones adicionales
        if self.trans:
            img = self.trans(image=img)["image"]

        # Convertir a tensor y agregar dimensión de canal
        img_tensor = torch.from_numpy(img / 255.).float().unsqueeze(0)

        return img_tensor, torch.tensor(self.y[ix])

In [26]:
device = "cuda" if torch.cuda.is_available() else "cpu"

# Cargar STL10
trainData = STL10('./data', split='train', download=True)
testData = STL10('./data', split='test', download=True)


Files already downloaded and verified
Files already downloaded and verified


In [27]:
train_images = np.array(trainData.data)
train_labels = trainData.labels
test_images = np.array(testData.data)
test_labels = testData.labels

In [32]:
import albumentations as A
device = "cuda" if torch.cuda.is_available() else "cpu"
trans = A.Compose([
    A.Resize(96, 96)
])

# Crear datasets
dataset = {
    'train': Dataset(train_images, train_labels, trans, device, to_gray=True),
    'test': Dataset(test_images, test_labels, trans, device, to_gray=True)
}

In [None]:
# #version git
# batch_size = 100
# trainLoader = DataLoader(trainData, batch_size=batch_size, shuffle=True,num_workers=2,pin_memory =True)
# testLoader = DataLoader(testData, batch_size=batch_size, shuffle=True,num_workers =2,pin_memory =True)

In [33]:
# Crear dataloaders
dataloader = {
    'train': torch.utils.data.DataLoader(dataset['train'], batch_size=64, shuffle=True, pin_memory=True),
    'test': torch.utils.data.DataLoader(dataset['test'], batch_size=256, shuffle=False)
}

# Verificar el resultado
imgs, labels = next(iter(dataloader['train']))
print("shape después de transformaciones:", imgs.shape)

shape después de transformaciones: torch.Size([64, 1, 96, 96])


In [34]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

TEST

In [35]:
import torchvision

resnet = torchvision.models.resnet50()
resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

real test

In [40]:
import torchvision

class ModelCustom(torch.nn.Module):
    def __init__(self, n_outputs=10, pretrained=True, freeze=False):
        super().__init__()
        # Descargar ResNet
        resnet = torchvision.models.resnet50(pretrained=pretrained)

        # Cambiar la primera capa convolucional para aceptar imágenes en escala de grises
        resnet.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

        # Mantener todas las capas excepto la última
        self.resnet = torch.nn.Sequential(*list(resnet.children())[:-1])
        if freeze:
            for param in self.resnet.parameters():
                param.requires_grad = False

        # Añadir una nueva capa lineal para la clasificación
        self.fc = torch.nn.Linear(2048, 10)

    def forward(self, x):
        x = self.resnet(x)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x

    def unfreeze(self):
        for param in self.resnet.parameters():
            param.requires_grad = True

In [41]:
model_custom = ModelCustom()
outputs = model_custom(torch.randn(64, 1, 96, 96))
print(outputs.shape)

torch.Size([64, 10])


In [42]:
from tqdm import tqdm
import numpy as np

def fit(model, dataloader, epochs=5, lr=1e-2):
    model.to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss()
    for epoch in range(1, epochs+1):
        model.train()
        train_loss, train_acc = [], []
        bar = tqdm(dataloader['train'])
        for batch in bar:
            X, y = batch
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            y_hat = model(X)
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
            acc = (y == torch.argmax(y_hat, axis=1)).sum().item() / len(y)
            train_acc.append(acc)
            bar.set_description(f"loss {np.mean(train_loss):.5f} acc {np.mean(train_acc):.5f}")
        bar = tqdm(dataloader['test'])
        val_loss, val_acc = [], []
        model.eval()
        with torch.no_grad():
            for batch in bar:
                X, y = batch
                X, y = X.to(device), y.to(device)
                y_hat = model(X)
                loss = criterion(y_hat, y)
                val_loss.append(loss.item())
                acc = (y == torch.argmax(y_hat, axis=1)).sum().item() / len(y)
                val_acc.append(acc)
                bar.set_description(f"val_loss {np.mean(val_loss):.5f} val_acc {np.mean(val_acc):.5f}")
        print(f"Epoch {epoch}/{epochs} loss {np.mean(train_loss):.5f} val_loss {np.mean(val_loss):.5f} acc {np.mean(train_acc):.5f} val_acc {np.mean(val_acc):.5f}")

In [43]:
#transfer learning a resnet50
model_c = ModelCustom(pretrained=True, freeze=False)
fit(model_c, dataloader)
#Con pretrained = true y freeze = true:  acc 0.75198 val_acc 0.71619

loss 2.12153 acc 0.20787: 100%|██████████| 79/79 [00:19<00:00,  4.00it/s]
val_loss 2.83236 val_acc 0.13708: 100%|██████████| 32/32 [00:06<00:00,  5.32it/s]


Epoch 1/5 loss 2.12153 val_loss 2.83236 acc 0.20787 val_acc 0.13708


loss 1.84442 acc 0.30637: 100%|██████████| 79/79 [00:10<00:00,  7.49it/s]
val_loss 1.99941 val_acc 0.22266: 100%|██████████| 32/32 [00:05<00:00,  5.85it/s]


Epoch 2/5 loss 1.84442 val_loss 1.99941 acc 0.30637 val_acc 0.22266


loss 1.65915 acc 0.38370: 100%|██████████| 79/79 [00:10<00:00,  7.45it/s]
val_loss 2.17249 val_acc 0.22095: 100%|██████████| 32/32 [00:05<00:00,  5.60it/s]


Epoch 3/5 loss 1.65915 val_loss 2.17249 acc 0.38370 val_acc 0.22095


loss 1.49987 acc 0.44759: 100%|██████████| 79/79 [00:10<00:00,  7.41it/s]
val_loss 2.12722 val_acc 0.22473: 100%|██████████| 32/32 [00:05<00:00,  5.77it/s]


Epoch 4/5 loss 1.49987 val_loss 2.12722 acc 0.44759 val_acc 0.22473


loss 1.31049 acc 0.52888: 100%|██████████| 79/79 [00:10<00:00,  7.33it/s]
val_loss 2.29022 val_acc 0.24463: 100%|██████████| 32/32 [00:05<00:00,  5.80it/s]

Epoch 5/5 loss 1.31049 val_loss 2.29022 acc 0.52888 val_acc 0.24463





In [44]:
#transfer learning a resnet50
model_c = ModelCustom(pretrained=True, freeze=True)
fit(model_c, dataloader)
#Con pretrained = true y freeze = true:  acc 0.75198 val_acc 0.71619

loss 2.20990 acc 0.19482: 100%|██████████| 79/79 [00:03<00:00, 20.83it/s]
val_loss 2.28703 val_acc 0.17871: 100%|██████████| 32/32 [00:05<00:00,  5.42it/s]


Epoch 1/5 loss 2.20990 val_loss 2.28703 acc 0.19482 val_acc 0.17871


loss 2.05686 acc 0.25158: 100%|██████████| 79/79 [00:03<00:00, 21.16it/s]
val_loss 2.23305 val_acc 0.18201: 100%|██████████| 32/32 [00:05<00:00,  5.51it/s]


Epoch 2/5 loss 2.05686 val_loss 2.23305 acc 0.25158 val_acc 0.18201


loss 1.98576 acc 0.28402: 100%|██████████| 79/79 [00:03<00:00, 20.02it/s]
val_loss 2.14832 val_acc 0.22498: 100%|██████████| 32/32 [00:06<00:00,  5.18it/s]


Epoch 3/5 loss 1.98576 val_loss 2.14832 acc 0.28402 val_acc 0.22498


loss 1.94363 acc 0.28797: 100%|██████████| 79/79 [00:04<00:00, 19.62it/s]
val_loss 2.28583 val_acc 0.18225: 100%|██████████| 32/32 [00:06<00:00,  5.03it/s]


Epoch 4/5 loss 1.94363 val_loss 2.28583 acc 0.28797 val_acc 0.18225


loss 1.92117 acc 0.30637: 100%|██████████| 79/79 [00:04<00:00, 17.81it/s]
val_loss 2.28383 val_acc 0.20898: 100%|██████████| 32/32 [00:05<00:00,  5.62it/s]

Epoch 5/5 loss 1.92117 val_loss 2.28383 acc 0.30637 val_acc 0.20898





Transformando de 3 canales las imagenes a 1 canal se reduce considerablemente la presicion del entrenamiento. Esto puede ser que ocurra que al pasar de RGB a escala de grises,  se esta perdiendo información. Los tres canales en una imagen RGB capturan información sobre los colores rojo, verde y azul, mientras que una imagen en escala de grises combina esa información en un solo canal. Esta pérdida de información puede ser crucial para ciertas tareas y conjuntos de datos.



Tratando de mejorar la presicion del modelo obtenemos una reduccion en la misma

In [None]:
# # newmodel = torch.nn.Sequential(*(list(model.children())[:-1]))
# # summary(newmodel, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

(5000, 2048)
Length of Y_train 5000


(8000, 2048)
Length of Y_test 8000


  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)


5000

Accuracy:  0.8286


Confusion matrix

Accuracy:  0.097875
