# 03 - CNN do Zero

Criar e treinar uma CNN do zero para classificação Coffee vs Mountains.

## Setup

In [None]:
import sys
sys.path.append('../src')

from pathlib import Path
import time
import numpy as np
import matplotlib.pyplot as plt
import torch
import json

from data.dataset import create_dataloaders
from models.cnn_scratch import SimpleCNN, CNNTrainer
from utils.visualization import plot_training_history, plot_confusion_matrix
from utils.metrics import calculate_metrics, Timer

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {DEVICE}")

In [None]:
ROOT_DIR = Path('../')
RESULTS_DIR = ROOT_DIR / 'results' / 'cnn_scratch'
MODELS_DIR = ROOT_DIR / 'models'
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
MODELS_DIR.mkdir(parents=True, exist_ok=True)

# Hiperparâmetros
BATCH_SIZE = 32
IMG_SIZE = (224, 224)
EPOCHS = 30
LR = 0.001

## Carregar Dataset

In [None]:
dataloaders, datasets = create_dataloaders(
    root_dir=str(ROOT_DIR),
    batch_size=BATCH_SIZE,
    img_size=IMG_SIZE,
    augment=True,
    num_workers=2
)

print(f"Train: {len(datasets['train'])} | Val: {len(datasets['val'])} | Test: {len(datasets['test'])}")
print(f"Classes: {datasets['train'].classes}")

## Criar Modelo

In [None]:
model = SimpleCNN(num_classes=2, dropout=0.5).to(DEVICE)
print(f"Modelo criado: {model.get_num_parameters():,} parâmetros")

## Treinar

In [None]:
trainer = CNNTrainer(model=model, device=DEVICE, learning_rate=LR)

print(f"Treinando por {EPOCHS} épocas...\n")

start = time.time()
history = trainer.train(
    train_loader=dataloaders['train'],
    val_loader=dataloaders['val'],
    epochs=EPOCHS
)
train_time = time.time() - start

print(f"\nTempo de treino: {train_time/60:.1f} minutos")

## Visualizar Treino

In [None]:
fig = plot_training_history(history, metrics=['train_loss', 'train_acc'])
plt.savefig(RESULTS_DIR / 'training_history.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nÚltima época:")
print(f"  Train Acc: {history['train_acc'][-1]:.1f}%")
print(f"  Val Acc: {history['val_acc'][-1]:.1f}%")

## Testar

In [None]:
y_true, y_pred = trainer.predict(dataloaders['test'])
metrics = calculate_metrics(y_true, y_pred, average='binary')

print("\nMétricas no teste:")
print(f"  Accuracy: {metrics['accuracy']*100:.1f}%")
print(f"  Precision: {metrics['precision']:.3f}")
print(f"  Recall: {metrics['recall']:.3f}")
print(f"  F1-Score: {metrics['f1_score']:.3f}")

## Matriz de Confusão

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_true, y_pred)
fig = plot_confusion_matrix(cm, datasets['train'].classes)
plt.savefig(RESULTS_DIR / 'confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()

## Medir Velocidade

In [None]:
model.eval()
test_batch = next(iter(dataloaders['test']))
test_img = test_batch[0][:1].to(DEVICE)

# Warm-up
with torch.no_grad():
    for _ in range(10):
        _ = model(test_img)

# Medir
times = []
with torch.no_grad():
    for _ in range(100):
        with Timer() as t:
            _ = model(test_img)
        times.append(t.get_elapsed_time() * 1000)

avg_time = np.mean(times)
print(f"\nInferência: {avg_time:.2f}ms (média de 100 execuções)")

## Salvar Resultados

In [None]:
# Salvar modelo
torch.save({
    'model_state_dict': model.state_dict(),
    'history': history,
    'test_metrics': metrics
}, MODELS_DIR / 'cnn_scratch_best.pth')

# Salvar métricas
results = {
    'model': 'CNN from Scratch',
    'parameters': model.get_num_parameters(),
    'training': {
        'epochs': EPOCHS,
        'training_time_minutes': train_time / 60,
        'final_train_acc': history['train_acc'][-1],
        'final_val_acc': history['val_acc'][-1]
    },
    'test': metrics,
    'inference': {
        'avg_time_ms': avg_time
    }
}

with open(RESULTS_DIR / 'metrics_summary.json', 'w') as f:
    json.dump(results, f, indent=2)

print(f"\nResultados salvos em: {RESULTS_DIR}")

## Conclusão

CNN do zero:
- Treino rápido (~10-20 min)
- Inferência muito rápida (~1-5ms)
- Boa precisão (depende do dataset)
- Controle total da arquitetura

Comparado ao YOLO:
- Muito mais rápida na inferência
- Só classifica (não detecta objetos)
- Modelo mais leve
- Ideal para problemas de classificação simples