In [44]:
import pandas as pd
import numpy as np
from pathlib import Path
import os
from torch.utils.data import Dataset
import cv2
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
from pytorch_lightning import LightningModule, Trainer
import torchvision
import torchmetrics
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint

In [45]:
def cdf(root_path, folder, label, on_ram):
    folder_files = Path (os.path.join(root_path,folder))
    file_list = []
    file_label = []
    for file_path in sorted(folder_files.glob('*.jpg')):
        
        if on_ram:
            file_list.append(cv2.imread(os.fspath(file_path)))
        else:
            file_list.append(os.fspath(file_path))
        file_label.append(label)
    
    data = {'image':file_list, 'label':file_label}
    
    return pd.DataFrame(data)


class Dataset_Flores(Dataset):
    def __init__(self, root_path = '',transform = None,test=False, on_ram = True ,shuffle = True ):
        self.on_ram = on_ram
        self.transform = transform
        
        
        if test:
            root_path = os.path.join(root_path,'test')
        else:
            root_path = os.path.join(root_path,'train')
        
        
        
        df_Rosas = cdf(root_path, 'Rosas',0,on_ram)
        df_Calas_rosa = cdf(root_path, 'Calas_rosa',1,on_ram)
        df_Cardenales_rojas = cdf(root_path, 'Cardenales_rojas',2,on_ram)
        df_Orejas_de_oso = cdf(root_path, 'Orejas_de_oso',3,on_ram)
        df_Otros = cdf(root_path, 'Otros',4,on_ram)
        
        self.Dataframe = pd.concat([df_Rosas, df_Calas_rosa,df_Cardenales_rojas,df_Orejas_de_oso,df_Otros]
                                       , ignore_index=True)
        
        if shuffle:
            self.Dataframe = self.Dataframe.sample(frac=1).reset_index(drop=True)
        
        
        
        
        
        pass
    
    def __len__(self):
        return len(self.Dataframe.index)
    
    def __getitem__(self, idx):
        
        row = self.Dataframe.iloc[[0]]
        
        numero = row.values[0][1]
        #print(numero)
        
        label = torch.tensor(int(numero)) 
        
        
        if self.on_ram:
            img = row.values[0][0]
        else:
            img = cv2.imread(row.values[0][0])
        #img = cv2.imread(img_path)
        
        if self.transform:
            img = self.transform(img)
        
        
        return (img,label)

In [4]:
#a = Dataset_Flores(root_path=os.path.join(os.getcwd(),"Dataset"),on_ram = False,test=True)

In [5]:
#a.__getitem__(0)

(array([[[105, 136, 115],
         [105, 136, 115],
         [102, 135, 114],
         ...,
         [139, 145, 158],
         [140, 146, 159],
         [139, 145, 158]],
 
        [[108, 139, 118],
         [107, 138, 117],
         [102, 135, 114],
         ...,
         [137, 143, 156],
         [138, 144, 157],
         [139, 145, 158]],
 
        [[106, 139, 118],
         [104, 137, 116],
         [100, 133, 112],
         ...,
         [138, 144, 157],
         [140, 146, 159],
         [140, 146, 159]],
 
        ...,
 
        [[  7,   7,   7],
         [  9,   9,   9],
         [  6,   6,   6],
         ...,
         [154, 170, 182],
         [ 85, 101, 113],
         [ 74,  90, 102]],
 
        [[  9,   9,   9],
         [ 11,  11,  11],
         [  9,   9,   9],
         ...,
         [174, 190, 202],
         [109, 125, 137],
         [ 87, 103, 115]],
 
        [[ 11,  11,  11],
         [ 14,  14,  14],
         [ 12,  12,  12],
         ...,
         [172, 188, 200],
  

In [46]:
transform = transforms.Compose([
    torchvision.transforms.ToPILImage(mode=None),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


#Dataset_Flores(root_path=os.path.join(os.getcwd(),"acumulados"),on_ram = False,test=True, transform=transform)


train_ds = Dataset_Flores(root_path=os.path.join(os.getcwd(),"Dataset"),on_ram = False,test=False, transform=transform)
val_ds = Dataset_Flores(root_path=os.path.join(os.getcwd(),"Dataset"),on_ram = False,test=True, transform=transform)
train_loader = DataLoader(train_ds, batch_size=64, num_workers=1)
val_loader = DataLoader(val_ds, batch_size=64, num_workers=1)
print(f'train len {len(train_ds)}')
print(f'val len {len(val_ds)}')

train len 692
val len 171


In [47]:
class Test(LightningModule):
    def __init__(self):
        super().__init__()
        self.model = torchvision.models.alexnet(pretrained=True)
        print(self.model)
        self.model.classifier[4] = torch.nn.Linear(4096,1024)
        self.model.classifier[6] = torch.nn.Linear(1024,5)
        print(self.model)
        self.val_acc = torchmetrics.Accuracy(num_classes=10)

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_nb):
        x, y = batch
        loss = F.cross_entropy(self(x), y)
        self.log('train_loss', loss, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_nb):
        x, y = batch
        out = self(x)
        loss = F.cross_entropy(out, y)
        print(out.size())
        self.val_acc(out, y.long())
        self.log('val_acc', self.val_acc, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_loss", loss, prog_bar=True)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)

In [40]:
#train_ds[0]

(tensor([[[ 0.5707,  0.6392,  0.7419,  ...,  0.3823,  0.2967, -0.1143],
          [-0.6109, -0.7822, -0.1486,  ..., -0.0972, -0.6623, -1.0048],
          [-1.5528, -1.0904, -0.6452,  ..., -0.7137, -0.9877, -1.3644],
          ...,
          [-1.3302, -1.6042, -1.7583,  ..., -0.3369, -0.8164, -0.9705],
          [-1.2103, -1.6213, -1.4672,  ..., -0.4054, -0.5767, -0.9192],
          [-1.5014, -1.6384, -1.5699,  ..., -0.8164, -0.7479, -0.8164]],
 
         [[ 1.1856,  1.2906,  1.4482,  ...,  0.8179,  0.7304,  0.2752],
          [-0.1975, -0.3550,  0.5553,  ...,  0.2577, -0.3725, -0.7402],
          [-1.3704, -0.7752, -0.0924,  ..., -0.4601, -0.7402, -1.1253],
          ...,
          [-1.1604, -1.4405, -1.5980,  ...,  1.4832,  1.0105,  0.8354],
          [-1.0553, -1.4755, -1.3179,  ...,  1.4132,  1.2206,  0.8704],
          [-1.3704, -1.4755, -1.3880,  ...,  0.9755,  1.0455,  0.9755]],
 
         [[ 1.1411,  1.1759,  1.2631,  ...,  1.3502,  1.2631,  0.8099],
          [-0.0267, -0.2184,

In [48]:
# Init our model
model = Test()
logger = TensorBoardLogger('./log')
checkpoint_callback = ModelCheckpoint(dirpath='./log/checkpoints',
                                      save_top_k=1,
                                      verbose=True,
                                      monitor='val_loss',
                                      mode='min')

# Initialize a trainer
trainer = Trainer(
    accelerator='gpu',
    max_epochs=5,
    logger=logger,
    callbacks=[checkpoint_callback]
)

# Train the model
trainer.fit(model, train_loader, val_loader)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

Missing logger folder: ./log\lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type     | Params
-------------------------------------
0 | model   | AlexNet  | 44.4 M
1 | val_acc | Accuracy | 0     
-------------------------------------
44.4 M    Trainable params
0         Non-trainable params
44.4 M    Total params
177.692   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

RuntimeError: DataLoader worker (pid(s) 21488) exited unexpectedly