In [25]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt


# Lista de las columnas que quieres seleccionar
columnas_a_cargar = ['SDT', 'pH_CAMPO', 'OD_%', 'TEMP_AMB', 'TEMP_AGUA', 'N_TOT']  # Cambia los nombres según tus columnas

# Lee el archivo CSV y lo convierte en un DataFrame
df = pd.read_csv('C:\\Users\\Alienware X15\\Desktop\\tesis\\BDreconstruccion\\BDWeka\\BDentrenamientoWeka.csv', usecols=columnas_a_cargar)

# Asignar las variables de entrada (X) y la variable de salida (y)
X = df.drop(columns=['N_TOT'])  # Todas las columnas excepto 'N_TOT'
y = df['N_TOT']  # La columna 'N_TOT' como variable de salida

# Normalizar los datos de entrada antes de dividir
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dividir los datos en conjunto de entrenamiento y prueba (80% entrenamiento, 20% prueba)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Verificar las formas
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)




X_train shape: (2441, 5)
X_test shape: (611, 5)
y_train shape: (2441,)
y_test shape: (611,)


In [24]:
# Convertir los datos a tensores
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)  # Convertir a valores para asegurar compatibilidad
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Imprimir cómo se ven los tensores
print("X_train_tensor:", X_train_tensor)
print("X_test_tensor:", X_test_tensor)
print("y_train_tensor:", y_train_tensor)
print("y_test_tensor:", y_test_tensor)

X_train_tensor: tensor([[-0.1895, -0.3654, -0.3355,  1.4303,  1.6370],
        [-0.2037,  0.1371,  0.1490, -0.3182,  0.4562],
        [-0.1632,  3.2478,  1.3398,  0.6954,  0.8306],
        ...,
        [ 0.1147, -0.8201, -0.0337, -0.0648, -0.1198],
        [-0.1680,  0.6156,  1.0070, -1.3571, -0.4654],
        [-0.1902,  0.8549,  0.6938,  0.1886,  0.1682]])
X_test_tensor: tensor([[-0.2044, -0.4851, -0.0141,  1.0755,  0.0242],
        [-0.1856,  1.0942,  0.9972, -0.5716, -1.4735],
        [-0.1899,  0.3763, -0.3795,  0.6954,  0.3698],
        ...,
        [-0.1199,  0.1371, -0.1805, -0.6729,  0.4274],
        [-0.0262, -0.4372, -2.0401, -1.2811,  0.2546],
        [-0.1588,  0.1371,  0.0087,  0.4420,  1.2626]])
y_train_tensor: tensor([0.4280, 0.7445, 0.8307,  ..., 0.7951, 0.8910, 0.6638])
y_test_tensor: tensor([3.8000e-01, 5.1666e-01, 8.5000e-01, 5.2024e-01, 2.6370e+00, 4.1800e-01,
        8.4588e-01, 2.8112e+00, 4.6019e-01, 1.8816e+00, 2.9721e+00, 6.4029e+00,
        3.6761e-01, 3.0910e

In [23]:
# Crear los datasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Crear los dataloaders con batch size de 32
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Verificar el funcionamiento imprimiendo un batch de los datos
for X_batch, y_batch in train_loader:
    print("X_batch:", X_batch)
    print("y_batch:", y_batch)
    break  # Solo para imprimir el primer batch

X_batch: tensor([[-0.2067, -0.1022,  0.3708, -0.3182,  0.1682],
        [ 2.3863,  0.6156,  0.4132, -0.8250, -0.4078],
        [-0.1971, -1.0594, -0.0305,  0.9488,  0.2546],
        [-0.1989,  0.3763,  0.1555, -0.5716, -0.4366],
        [ 0.1258, -0.1022,  0.0837,  0.1886,  0.3410],
        [-0.1826, -0.1022,  0.2175,  0.1886, -1.3295],
        [-0.1833,  1.5728,  0.2403, -0.8250, -0.1774],
        [-0.1607, -1.0594, -0.6927, -0.8250, -0.0334],
        [-0.0647, -0.3415, -0.7449, -1.0784, -1.3295],
        [-0.1651,  1.3335,  0.6090,  1.7090,  0.6290],
        [-0.0729,  1.1899,  1.4833,  1.2276,  1.3202],
        [-0.1483,  3.7264,  3.1733,  0.4420,  1.1186],
        [-0.1961,  1.3335,  0.2077,  0.1886, -0.8110],
        [-0.1671,  0.8789,  0.7982, -1.7626, -1.4447],
        [ 0.0777, -0.7244,  2.2892,  1.9118,  2.4435],
        [ 0.0348, -0.1022, -2.6469,  1.4556,  1.3202],
        [ 0.0231, -1.3705, -2.6469,  0.1633,  2.0114],
        [ 0.1220,  0.3524,  1.6301, -0.3689, -0.0046],
 

In [26]:
import torch
import torch.nn as nn
import torch.optim as optim

# Definir el modelo Transformer
class TransformerRegression(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout):
        super(TransformerRegression, self).__init__()
        self.input_fc = nn.Linear(input_dim, d_model)  # Proyección a las dimensiones del modelo
        self.pos_encoder = nn.Sequential(
            nn.Linear(d_model, d_model),  # Codificación posicional simple
            nn.ReLU()
        )
        self.transformer = nn.Transformer(
            d_model=d_model, 
            nhead=nhead, 
            num_encoder_layers=num_encoder_layers, 
            dim_feedforward=dim_feedforward, 
            dropout=dropout
        )
        self.output_fc = nn.Linear(d_model, 1)  # Transformación final para regresión (un valor de salida)
    
    def forward(self, src):
        # Proyectar los datos de entrada a la dimensión del modelo
        src = self.input_fc(src)
        src = self.pos_encoder(src)
        
        # La entrada debe tener la forma [seq_len, batch_size, feature_dim]
        src = src.unsqueeze(0)  # Añadimos una dimensión de secuencia (de longitud 1 en este caso)
        
        # Pasamos los datos a través del transformer
        output = self.transformer(src, src)
        
        # Tomamos solo el último vector para la predicción
        output = self.output_fc(output.squeeze(0))
        return output

# Inicializar el modelo
input_dim = X_train_tensor.shape[1]  # Dimensión de entrada (número de características)
d_model = 128  # Tamaño del embedding
nhead = 4  # Número de cabezas de atención
num_encoder_layers = 3  # Número de capas del encoder
dim_feedforward = 128  # Tamaño de las capas feedforward
dropout = 0.1  # Tasa de dropout

model = TransformerRegression(input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout)

# Definir el optimizador y la función de pérdida
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# Entrenamiento del modelo
def train_model(model, train_loader, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs.squeeze(-1), y_batch)  # Ajuste para eliminar la dimensión extra de salida
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

# Entrenar el modelo por 50 epochs
num_epochs = 50
train_model(model, train_loader, num_epochs)

# Evaluar el modelo en el conjunto de prueba
def evaluate_model(model, test_loader):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            loss = criterion(outputs.squeeze(-1), y_batch)
            total_loss += loss.item()
    
    print(f'Test Loss: {total_loss/len(test_loader):.4f}')

# Evaluar en el conjunto de prueba
evaluate_model(model, test_loader)




Epoch [1/50], Loss: 51.2749
Epoch [2/50], Loss: 39.0908
Epoch [3/50], Loss: 30.7248
Epoch [4/50], Loss: 36.4027
Epoch [5/50], Loss: 27.7747
Epoch [6/50], Loss: 24.8000
Epoch [7/50], Loss: 29.9504
Epoch [8/50], Loss: 57.2652
Epoch [9/50], Loss: 36.9878
Epoch [10/50], Loss: 61.2119
Epoch [11/50], Loss: 59.9858
Epoch [12/50], Loss: 61.1158
Epoch [13/50], Loss: 62.2753
Epoch [14/50], Loss: 61.9372
Epoch [15/50], Loss: 61.7120
Epoch [16/50], Loss: 37.8623
Epoch [17/50], Loss: 48.5712
Epoch [18/50], Loss: 41.4511
Epoch [19/50], Loss: 39.4637
Epoch [20/50], Loss: 35.9119
Epoch [21/50], Loss: 34.1705
Epoch [22/50], Loss: 34.5732
Epoch [23/50], Loss: 33.0217
Epoch [24/50], Loss: 34.6547
Epoch [25/50], Loss: 46.6086
Epoch [26/50], Loss: 60.5992
Epoch [27/50], Loss: 59.1387
Epoch [28/50], Loss: 66.0789
Epoch [29/50], Loss: 71.7185
Epoch [30/50], Loss: 56.2051
Epoch [31/50], Loss: 50.7266
Epoch [32/50], Loss: 51.7439
Epoch [33/50], Loss: 54.2977
Epoch [34/50], Loss: 33.5893
Epoch [35/50], Loss: 27

In [22]:
import torch
import torch.nn as nn
import torch.optim as optim

# Definir el modelo Transformer
class TransformerRegression(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout):
        super(TransformerRegression, self).__init__()
        self.input_fc = nn.Linear(input_dim, d_model)  # Proyección a las dimensiones del modelo
        self.pos_encoder = nn.Sequential(
            nn.Linear(d_model, d_model),  # Codificación posicional simple
            nn.ReLU()
        )
        self.transformer = nn.Transformer(
            d_model=d_model, 
            nhead=nhead, 
            num_encoder_layers=num_encoder_layers, 
            dim_feedforward=dim_feedforward, 
            dropout=dropout
        )
        self.output_fc = nn.Linear(d_model, 1)  # Transformación final para regresión (un valor de salida)
    
    def forward(self, src):
        # Proyectar los datos de entrada a la dimensión del modelo
        src = self.input_fc(src)
        src = self.pos_encoder(src)
        
        # La entrada debe tener la forma [seq_len, batch_size, feature_dim]
        src = src.unsqueeze(0)  # Añadimos una dimensión de secuencia (de longitud 1 en este caso)
        
        # Pasamos los datos a través del transformer
        output = self.transformer(src, src)
        
        # Tomamos solo el último vector para la predicción
        output = self.output_fc(output.squeeze(0))
        return output

# Inicializar el modelo
input_dim = X_train_tensor.shape[1]  # Dimensión de entrada (número de características)
d_model = 64  # Tamaño del embedding
nhead = 4  # Número de cabezas de atención
num_encoder_layers = 3  # Número de capas del encoder
dim_feedforward = 128  # Tamaño de las capas feedforward
dropout = 0.1  # Tasa de dropout

model = TransformerRegression(input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout)

# Definir el optimizador y la función de pérdida
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# Entrenamiento del modelo
def train_model(model, train_loader, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs.squeeze(-1), y_batch)  # Ajuste para eliminar la dimensión extra de salida
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

# Entrenar el modelo por 50 epochs
num_epochs = 50
train_model(model, train_loader, num_epochs)

import matplotlib.pyplot as plt

# Función para evaluar el modelo y graficar los valores reales vs. predichos
def evaluate_and_plot(model, test_loader):
    model.eval()
    real_values = []
    predicted_values = []
    
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            real_values.extend(y_batch.numpy())  # Convertir los valores reales a una lista
            predicted_values.extend(outputs.squeeze(-1).numpy())  # Convertir las predicciones a una lista

    # Graficar los valores reales vs predichos
    plt.figure(figsize=(8, 6))
    plt.scatter(real_values, predicted_values, alpha=0.5, color='b', label='Predicciones')
    plt.plot([min(real_values), max(real_values)], [min(real_values), max(real_values)], color='r', label='Línea ideal')  # Línea de referencia
    plt.xlabel('Valores Reales')
    plt.ylabel('Valores Predichos')
    plt.title('Valores Reales vs Predichos')
    plt.legend()
    plt.show()

# Evaluar y graficar en el conjunto de prueba
evaluate_and_plot(model, test_loader)



Epoch [1/50], Loss: 55.8563
Epoch [2/50], Loss: 43.4895
Epoch [3/50], Loss: 33.4976
Epoch [4/50], Loss: 33.7798
Epoch [5/50], Loss: 24.9737
Epoch [6/50], Loss: 24.8261
Epoch [7/50], Loss: 30.0731
Epoch [8/50], Loss: 23.1109
Epoch [9/50], Loss: 29.7986
Epoch [10/50], Loss: 22.9058
Epoch [11/50], Loss: 35.4626
Epoch [12/50], Loss: 26.5916
Epoch [13/50], Loss: 22.0304
Epoch [14/50], Loss: 29.0998
Epoch [15/50], Loss: 26.3026
Epoch [16/50], Loss: 23.9389
Epoch [17/50], Loss: 23.1260
Epoch [18/50], Loss: 24.6683
Epoch [19/50], Loss: 23.9188
Epoch [20/50], Loss: 26.4518
Epoch [21/50], Loss: 41.8945
Epoch [22/50], Loss: 32.9100
Epoch [23/50], Loss: 32.6572
Epoch [24/50], Loss: 33.1944
Epoch [25/50], Loss: 33.0145
Epoch [26/50], Loss: 32.9621
Epoch [27/50], Loss: 32.3990
Epoch [28/50], Loss: 34.3720
Epoch [29/50], Loss: 32.9623
Epoch [30/50], Loss: 33.0150
Epoch [31/50], Loss: 33.6679
Epoch [32/50], Loss: 33.7875
Epoch [33/50], Loss: 32.8325
Epoch [34/50], Loss: 32.4313
Epoch [35/50], Loss: 31

RuntimeError: Numpy is not available