In [None]:
# Importancion de Librerías y Definición de Parámetros

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchviz import make_dot
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import os

# Parámetros globales
BATCH_SIZE = 32
NUM_CLASSES = 10  # Número de clases para la clasificación (ajusta según tus necesidades)
LEARNING_RATE = 0.001
NUM_EPOCHS = 10


In [None]:
#Definicion de Dataset personalizado

class AudioDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        """
        Args:
            data_dir (string): Directorio con las imágenes de espectrogramas.
            transform (callable, optional): Transformaciones que se aplicarán a los datos.
        """
        self.data_dir = data_dir
        self.transform = transform
        self.samples = []
        self.labels = []

        # Cargar los datos
        self._load_data()

    def _load_data(self):
        # Asumiendo que las imágenes de espectrogramas están en subcarpetas según la clase
        classes = os.listdir(self.data_dir)
        class_to_idx = {cls_name: idx for idx, cls_name in enumerate(classes)}

        for cls_name in classes:
            cls_dir = os.path.join(self.data_dir, cls_name)
            for filename in os.listdir(cls_dir):
                if filename.endswith('.png'):
                    filepath = os.path.join(cls_dir, filename)
                    self.samples.append(filepath)
                    self.labels.append(class_to_idx[cls_name])

        self.classes = classes
        self.class_to_idx = class_to_idx

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        import PIL.Image as Image

        img_path = self.samples[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label


In [None]:
# Definir transformaciones

transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Ajusta el tamaño según sea necesario
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])


In [None]:
# Crear Instancias del Dataset y del DataLoader

# Directorio donde se encuentran los espectrogramas
data_dir = 'data/spectrograms'  # Ajusta esta ruta a donde estén tus datos

# Crear datasets
dataset = AudioDataset(data_dir=data_dir, transform=transform)

# Dividir en conjuntos de entrenamiento y validación
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Crear DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)


In [None]:
# Definicion de la Arquitectura del Modelo

class CNN_LSTM(nn.Module):
    def __init__(self, num_classes):
        super(CNN_LSTM, self).__init__()
        
        # Capas convolucionales
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),  # 3 canales RGB
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # Reduce las dimensiones a la mitad
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        
        # Parámetros para LSTM
        self.lstm_input_size = 32 * 32  # Ajusta según las dimensiones después de las capas CNN
        self.hidden_size = 64
        self.num_layers = 2
        
        # LSTM
        self.lstm = nn.LSTM(input_size=self.lstm_input_size, hidden_size=self.hidden_size,
                            num_layers=self.num_layers, batch_first=True)
        
        # Capa totalmente conectada
        self.fc = nn.Linear(self.hidden_size, num_classes)
        
    def forward(self, x):
        batch_size = x.size(0)
        # Aplicar CNN
        x = self.cnn(x)  # Salida: [batch_size, canales, altura, anchura]
        
        # Preparar los datos para LSTM
        x = x.view(batch_size, -1, self.lstm_input_size)  # Salida: [batch_size, secuencia, características]
        
        # LSTM
        h_0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
        c_0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
        
        out, (hn, cn) = self.lstm(x, (h_0, c_0))  # out: [batch_size, secuencia, hidden_size]
        
        # Tomar la última salida de la secuencia
        out = out[:, -1, :]  # [batch_size, hidden_size]
        
        # Capa totalmente conectada
        out = self.fc(out)  # [batch_size, num_classes]
        return out


In [None]:
# Entrenamiento del Modelo

# Crear instancia del modelo
model = CNN_LSTM(num_classes=NUM_CLASSES)

# Mover el modelo a la GPU si está disponible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Definir la función de pérdida y el optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)


In [None]:
# Funcion de Entrenamiento
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        # Entrenamiento
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            # Adelante
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Atrás y optimización
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Estadísticas
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = 100 * correct / total
        
        # Validación
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        val_epoch_loss = val_loss / len(val_loader.dataset)
        val_epoch_acc = 100 * val_correct / val_total
        
        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.2f}%, '
              f'Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.2f}%')


In [None]:
train_model(model, train_loader, val_loader, criterion, optimizer, NUM_EPOCHS)


In [None]:
from torchviz import make_dot

# Crear una muestra de entrada
dummy_input = torch.randn(1, 3, 128, 128).to(device)  # Ajusta las dimensiones según sea necesario

# Obtener la salida del modelo
model.eval()
output = model(dummy_input)

# Generar y guardar la imagen
dot = make_dot(output, params=dict(model.named_parameters()))
dot.format = 'png'
dot.render('model_architecture')


In [None]:
from torchsummary import summary

# Instalar torchsummary si no lo tienes
# pip install torchsummary

summary(model, input_size=(3, 128, 128))
