In [144]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [145]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

In [146]:
# Cargar el dataset desde Google Drive
file_path = '/content/drive/My Drive/inteligencia-artificial-II/dataset_calidad_agua.csv'
df = pd.read_csv(file_path)

print(df.head())
# Informacion del dataset
print(df.info())


        Fecha         Temporada  Temperatura_Agua_T    pH  Cianuro_Libre  \
0  2016-01-01  Tiempo de lluvia               15.57  7.91           0.18   
1  2016-01-02  Tiempo de lluvia               13.43  8.33           0.17   
2  2016-01-03  Tiempo de lluvia               19.29  8.48           0.12   
3  2016-01-04  Tiempo de lluvia               17.67  6.11           0.05   
4  2016-01-05  Tiempo de lluvia               26.22  6.34           0.10   

   Cromo_6   DBO5     DQO  Fosforo  Hierro  Niquel  Solidos_Suspendidos  \
0     0.04  51.40  134.97     2.26    0.60    0.07                24.85   
1     0.05   3.51  186.84     1.47    2.80    0.47                63.69   
2     0.09  24.07  152.57     4.25    1.31    0.44                 8.74   
3     0.08  14.38  205.67     4.19    0.69    0.12                23.92   
4     0.04  27.69    7.05     2.37    3.66    0.12                57.90   

   Solidos_Sedimentables  Sulfuros  Fenoles    Calidad  
0                   6.25      0.18 

In [147]:
# Convertir fecha a datetime
df['Fecha'] = pd.to_datetime(df['Fecha'])

# Convertir columna temporada
df['Temporada'] = df['Temporada'].map({'Tiempo de lluvia': 1, 'Tiempo de estiaje': 2})

# Convertir la columna calidad
df['Calidad'] = df['Calidad'].map({'Buena': 0, 'Aceptable': 1, 'Mala': 2})

print(df.head())
print(df.info())

       Fecha  Temporada  Temperatura_Agua_T    pH  Cianuro_Libre  Cromo_6  \
0 2016-01-01          1               15.57  7.91           0.18     0.04   
1 2016-01-02          1               13.43  8.33           0.17     0.05   
2 2016-01-03          1               19.29  8.48           0.12     0.09   
3 2016-01-04          1               17.67  6.11           0.05     0.08   
4 2016-01-05          1               26.22  6.34           0.10     0.04   

    DBO5     DQO  Fosforo  Hierro  Niquel  Solidos_Suspendidos  \
0  51.40  134.97     2.26    0.60    0.07                24.85   
1   3.51  186.84     1.47    2.80    0.47                63.69   
2  24.07  152.57     4.25    1.31    0.44                 8.74   
3  14.38  205.67     4.19    0.69    0.12                23.92   
4  27.69    7.05     2.37    3.66    0.12                57.90   

   Solidos_Sedimentables  Sulfuros  Fenoles  Calidad  
0                   6.25      0.18     0.09        0  
1                   6.19      

In [148]:
# Ordenar los datos por fecha
df = df.sort_values(by='Fecha')

# Seleccionar las caracteristicas y el objetivo
features = df.drop(columns=['Fecha', 'Calidad'])
target = df['Calidad']

# Normalizar las caracteristicas
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

# Convertir a numpy arrays
X = np.array(features_scaled)
y = np.array(target)

# Crear secuencias temporales
def create_sequences(X, y, seq_length=30):
    sequences, targets = [], []
    for i in range(len(X) - seq_length):
        sequences.append(X[i:i+seq_length])
        targets.append(y[i+seq_length])
    return np.array(sequences), np.array(targets)

# Definir el tamño de la secuencia
X_seq, y_seq = create_sequences(X, y)

# Dividir en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False)

# Crear dataset
class WaterQualityDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Crear DataLoader
batch_size = 64
train_loader = DataLoader(WaterQualityDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(WaterQualityDataset(X_test, y_test), batch_size=batch_size, shuffle=False)

In [149]:
# Definir el modelo rnn
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hiperparámetros
input_size = X_seq.shape[2]  # Tamaño de la secuencia de entrada
hidden_size = 64  # Tamaño de la capa oculta
num_layers = 2  # Número de capas LSTM
output_size = len(np.unique(y))  # Tamaño de la salida, según las clases de y
learning_rate = 0.001  # Tasa de aprendizaje
num_epochs = 10  # Número de épocas

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Inicializar el modelo, la función de pérdida y el optimizador
model = RNNModel(input_size, hidden_size, num_layers, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [150]:
# Entrenamiento del modelo
def train_model(model, train_loader, criterion, optimizer, num_epochs, device):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for sequences, labels in train_loader:
            sequences, labels = sequences.to(device), labels.to(device)

            # Forward pass
            outputs = model(sequences)
            loss = criterion(outputs, labels)

            # Backward pass y optimización
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}')

train_model(model, train_loader, criterion, optimizer, num_epochs, device)

Epoch [1/10], Loss: 0.9586
Epoch [2/10], Loss: 0.9276
Epoch [3/10], Loss: 0.9239
Epoch [4/10], Loss: 0.9253
Epoch [5/10], Loss: 0.9240
Epoch [6/10], Loss: 0.9234
Epoch [7/10], Loss: 0.9196
Epoch [8/10], Loss: 0.9229
Epoch [9/10], Loss: 0.9206
Epoch [10/10], Loss: 0.9225


In [151]:
# Evaluación del modelo
def evaluate_model(model, test_loader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for sequences, labels in test_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            outputs = model(sequences)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy: {100 * correct / total:.2f}%')

evaluate_model(model, test_loader, device)

Accuracy: 66.07%


In [154]:
# Predicciones 2025
def predict_future_quality(model, data, device, seq_length=30):
    model.eval()
    predictions = []
    with torch.no_grad():
        for i in range(len(data)):
            seq = torch.tensor(data[i], dtype=torch.float32).unsqueeze(0).to(device)
            output = model(seq)
            _, predicted = torch.max(output, 1)
            predictions.append(predicted.item())
    return predictions

# Preparar datos para predicciones
data_seq, _ = create_sequences(X, y)
predictions = predict_future_quality(model, data_seq, device)

print(predictions[:100])

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
