In [None]:
# Import necessary libraries
from google.colab import files
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

# Upload the dataset file
uploaded = files.upload()

# Load the dataset into a DataFrame
df = pd.read_csv("sample_data (18).csv")

# Display the DataFrame
df

# Extract features and target variable
features = df[['EC', 'PH', 'Temp']]
target = df['D']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42)

# Standardize the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reshape input data for LSTM (adding a dimension for time steps)
X_train_scaled_lstm = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_test_scaled_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Define an LSTM model
def create_lstm_model():
    model = Sequential()
    model.add(LSTM(128, input_shape=(X_train_scaled_lstm.shape[1], 1), return_sequences=True, activation='relu'))
    model.add(Dropout(0.2))
    model.add(LSTM(64, return_sequences=True, activation='relu'))
    model.add(Dropout(0.2))
    model.add(LSTM(32, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model

# Define a more complex deep learning model
def create_complex_model():
    model = Sequential()
    model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model

# Initialize the models
models = {
    'LSTM': KerasRegressor(build_fn=create_lstm_model, epochs=50, batch_size=32, verbose=0),
    'ANN': KerasRegressor(build_fn=create_complex_model, epochs=50, batch_size=32, verbose=0)
}

# Evaluate and plot for each model
for model_name, model in models.items():
    # Use reshaped data for LSTM
    if model_name == 'LSTM':
        model.fit(X_train_scaled_lstm, y_train)
        y_pred_train = model.predict(X_train_scaled_lstm)
        y_pred_test = model.predict(X_test_scaled_lstm)
    else:
        model.fit(X_train_scaled, y_train)
        y_pred_train = model.predict(X_train_scaled)
        y_pred_test = model.predict(X_test_scaled)

    # Calculate metrics for training set
    r2_train = r2_score(y_train, y_pred_train)
    mse_train = mean_squared_error(y_train, y_pred_train)
    mae_train = mean_absolute_error(y_train, y_pred_train)
    rmse_train = np.sqrt(mse_train)
    mape_train = np.mean(np.abs((y_train - y_pred_train) / y_train)) * 100

    # Calculate metrics for test set
    r2_test = r2_score(y_test, y_pred_test)
    mse_test = mean_squared_error(y_test, y_pred_test)
    mae_test = mean_absolute_error(y_test, y_pred_test)
    rmse_test = np.sqrt(mse_test)
    mape_test = np.mean(np.abs((y_test - y_pred_test) / y_test)) * 100

    # Display metrics
    metrics_df = pd.DataFrame({
        'Metric': ['R-squared', 'MSE', 'MAE', 'RMSE', 'MAPE'],
        'Training Set': [r2_train, mse_train, mae_train, rmse_train, mape_train],
        'Test Set': [r2_test, mse_test, mae_test, rmse_test, mape_test]
    })

    print(f"\nMetrics for {model_name}:\n{metrics_df}")

    # Plot actual vs predicted
    plt.scatter(y_test, y_pred_test)
    plt.title(f'Actual vs Predicted ({model_name})')
    plt.xlabel('Actual', fontsize=20)
    plt.ylabel('Predicted', fontsize=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.show()

# Plot loss for each model
for model_name, model in models.items():
    # Use reshaped data for LSTM
    if model_name == 'LSTM':
        history = model.fit(X_train_scaled_lstm, y_train, validation_data=(X_test_scaled_lstm, y_test), epochs=50, batch_size=32, verbose=0)
    else:
        history = model.fit(X_train_scaled, y_train, validation_data=(X_test_scaled, y_test), epochs=50, batch_size=32, verbose=0)

    # Plot training and validation loss
    plt.figure(figsize=(12, 6))

    # Plot Training Loss
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{model_name} Training and Test Loss', fontsize=16)
    plt.xlabel('Epoch', fontsize=20)
    plt.ylabel('Loss', fontsize=20)
    plt.legend()
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)

    # Plot additional metrics
    plt.subplot(1, 2, 2)
    if 'mae' in history.history:
        plt.plot(history.history['mae'], label='Training MAE')
        plt.plot(history.history['val_mae'], label='Validation MAE')
        plt.title(f'{model_name} Training and Test MAE', fontsize=16)
        plt.xlabel('Epoch', fontsize=20)
        plt.ylabel('MAE', fontsize=20)
        plt.legend()
        plt.xticks(fontsize=18)
        plt.yticks(fontsize=18)

    plt.tight_layout()
    plt.show()
