In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

# Load the dataset
dataset_path = "coliform_cfu_dataset87.csv"
dataset = pd.read_csv(dataset_path)

# Prepare the features (R, G, B for first 4 hours) and target variable
features = [f"R_hour_{t}" for t in range(1, 5)] + [f"G_hour_{t}" for t in range(1, 5)] + [f"B_hour_{t}" for t in range(1, 5)]
X = dataset[features].values
y = dataset['CFU/100ml'].values

# Normalize the data
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Reshape X for LSTM input (samples, time steps, features per time step)
X_reshaped = X_scaled.reshape(X_scaled.shape[0], 4, 3)  # 4 hours (time steps), 3 features (R, G, B)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y_scaled, test_size=0.2, random_state=42)

# Build the LSTM model
model = Sequential([
    LSTM(64, activation='tanh', return_sequences=True, input_shape=(4, 3)),
    BatchNormalization(),
    Dropout(0.2),
    LSTM(32, activation='tanh'),
    Dropout(0.2),
    Dense(1)  # Output layer for regression
])

# Compile the model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mse')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)

# Evaluate the model
predictions = model.predict(X_test)
predictions_rescaled = scaler_y.inverse_transform(predictions)
y_test_rescaled = scaler_y.inverse_transform(y_test)
mse = mean_squared_error(y_test_rescaled, predictions_rescaled)
print(f"LSTM Model Mean Squared Error: {mse}")

# Make predictions for new samples (optional)
test_samples = pd.DataFrame([
    {"R_hour_1": 200, "R_hour_2": 195.68, "R_hour_3": 190.56, "R_hour_4": 189.45,
     "G_hour_1": 50, "G_hour_2": 56.18, "G_hour_3": 58.80, "G_hour_4": 66.63,
     "B_hour_1": 50, "B_hour_2": 47.55, "B_hour_3": 46.36, "B_hour_4": 44.71},
    {"R_hour_1": 200, "R_hour_2": 196.77, "R_hour_3": 191.26, "R_hour_4": 187.49,
     "G_hour_1": 50, "G_hour_2": 54.13, "G_hour_3": 62.73, "G_hour_4": 64.79,
     "B_hour_1": 50, "B_hour_2": 47.84, "B_hour_3": 47.42, "B_hour_4": 46.62}
])
test_samples_scaled = scaler_X.transform(test_samples.values)
test_samples_reshaped = test_samples_scaled.reshape(test_samples.shape[0], 4, 3)
predicted_cfu = model.predict(test_samples_reshaped)
predicted_cfu_rescaled = scaler_y.inverse_transform(predicted_cfu)

# Display predictions
for i, cfu in enumerate(predicted_cfu_rescaled, start=1):
    print(f"Sample {i}: Predicted CFU = {cfu[0]:.2f}")


  super().__init__(**kwargs)


Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - loss: 0.3380
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.2438 
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.1691 
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.1526 
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0825 
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0683 
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0484 
Epoch 8/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0369 
Epoch 9/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0472 
Epoch 10/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0384 
Epoch 11/50
[1m2/2