In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets
import os
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [3]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        
        # --- Encoder ---
        # Entrada: (B, 3, 256, 256)
        self.encoder = nn.Sequential(
            # 1ª Camada: (B, 3, 256, 256) -> (B, 16, 128, 128)
            nn.Conv2d(3, 16, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True),
            
            # 2ª Camada: (B, 16, 128, 128) -> (B, 32, 64, 64)
            nn.Conv2d(16, 32, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True),
            
            # 3ª Camada: (B, 32, 64, 64) -> (B, 64, 32, 32)
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True),
            
            # 4ª Camada: (B, 64, 32, 32) -> (B, 128, 16, 16)
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True),
            
            # 5ª Camada: (B, 128, 16, 16) -> (B, 128, 8, 8)
            nn.Conv2d(128, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True)
            # Fim do Encoder. Saída é o espaço latente (128, 8, 8)
        )
        
        # --- Decoder ---
        # Entrada: (B, 128, 8, 8)
        self.decoder = nn.Sequential(
            # 1ª Camada: (B, 128, 8, 8) -> (B, 128, 16, 16)
            nn.ConvTranspose2d(128, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True),
            
            # 2ª Camada: (B, 128, 16, 16) -> (B, 64, 32, 32)
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True),
            
            # 3ª Camada: (B, 64, 32, 32) -> (B, 32, 64, 64)
            nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True),
            
            # 4ª Camada: (B, 32, 64, 64) -> (B, 16, 128, 128)
            nn.ConvTranspose2d(32, 16, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True),
            
            # 5ª Camada: (B, 16, 128, 128) -> (B, 3, 256, 256)
            nn.ConvTranspose2d(16, 3, kernel_size=4, stride=2, padding=1),
            nn.Sigmoid() # Saída da imagem normalizada entre [0, 1]
        )

    def forward(self, x):
        encoded = self.encoder(x)
        latent_vector = encoded 
        decoded = self.decoder(latent_vector)
        return decoded, latent_vector

In [15]:
data_root = './places365_data'

dataset = datasets.Places365(
    root=data_root,
    split='train-standart',              # Split: 'train-standard', 'train-challenge', or 'val'
    small=True,
    download=True,
    transform=transforms.ToTensor()
)

BATCH_SIZE = 8
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)


ValueError: Unknown value 'train-standart' for argument split. Valid values are {'train-standard', 'train-challenge', 'val', 'test'}.

In [13]:
EPOCHS = 10
LEARNING_RATE = 1e-3

model = AutoEncoder().to(device)
criterion = nn.MSELoss() 
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

print(f"\nModel initialized and sent to {device}.")
print(f"Total parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
print("-" * 30)

# --- 4B. The Training Function ---
def train_model(model, dataloader, criterion, optimizer, num_epochs):
    model.train()
    training_losses = []
    
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        for batch_idx, (data, _) in enumerate(dataloader):
            data = data.to(device)
            reconstructed, _ = model(data)
            loss = criterion(reconstructed, data)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item() * data.size(0)

            if batch_idx % 10 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx}/{len(dataloader)}], Loss: {loss.item():.6f}')

        avg_epoch_loss = epoch_loss / len(dataset)
        training_losses.append(avg_epoch_loss)
        print(f"--- Epoch {epoch+1} finished. Average Loss: {avg_epoch_loss:.6f} ---")
        
    return training_losses

training_losses = train_model(model, dataloader, criterion, optimizer, EPOCHS)

print("\nTraining complete.")



Model initialized and sent to cuda.
Total parameters: 870,499
------------------------------
Epoch [1/10], Batch [0/4563], Loss: 0.071672
Epoch [1/10], Batch [10/4563], Loss: 0.087771
Epoch [1/10], Batch [20/4563], Loss: 0.065756
Epoch [1/10], Batch [30/4563], Loss: 0.044905
Epoch [1/10], Batch [40/4563], Loss: 0.041097
Epoch [1/10], Batch [50/4563], Loss: 0.029194
Epoch [1/10], Batch [60/4563], Loss: 0.043347
Epoch [1/10], Batch [70/4563], Loss: 0.031558
Epoch [1/10], Batch [80/4563], Loss: 0.026314
Epoch [1/10], Batch [90/4563], Loss: 0.025267
Epoch [1/10], Batch [100/4563], Loss: 0.023428
Epoch [1/10], Batch [110/4563], Loss: 0.025687
Epoch [1/10], Batch [120/4563], Loss: 0.023460
Epoch [1/10], Batch [130/4563], Loss: 0.015108
Epoch [1/10], Batch [140/4563], Loss: 0.018682
Epoch [1/10], Batch [150/4563], Loss: 0.023365
Epoch [1/10], Batch [160/4563], Loss: 0.018064
Epoch [1/10], Batch [170/4563], Loss: 0.020414
Epoch [1/10], Batch [180/4563], Loss: 0.024171
Epoch [1/10], Batch [190

In [14]:
torch.save(model, f"models/model-v1")