In [None]:
# Predictive Analytics of Air Quality for IoT- Enabled Industrial Environments

In [None]:
# LSTM model Implementation

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Load your dataset
file_path = 'data/airparticle.csv'  # Replace with your file path
df = pd.read_csv(file_path)

# Ensure the timestamp column is in datetime format
df['ts_received'] = pd.to_datetime(df['ts_received'])

# Sort the data based on time
df = df.sort_values('ts_received')

# Select the features and target for prediction (predicting 'co2' in this example)
features = ['voc', 'mc_1p0', 'mc_2p5', 'mc_10p0', 'mc_4p0', 'ambient_rh', 'ambient_t', 'nox_index', 'voc_index']
target = 'co2'

# Note: Each air quality pollutant target is tested seperately
# please switch between your target variable for the required prediction

#target = 'voc'
#target = 'mc_2p5'
#target = 'mc_10p0'
#target = 'nox_index'


# Normalizing the feature columns
scaler_X = MinMaxScaler()
X_scaled = scaler_X.fit_transform(df[features])

# Normalize the target separately
scaler_y = MinMaxScaler()
y_scaled = scaler_y.fit_transform(df[[target]])

# Define the input (X) and output (y) again after scaling
X = X_scaled
y = y_scaled

In [None]:
# Create sequences
def create_sequences(X, y, time_steps=10):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 10
X_seq, y_seq = create_sequences(X, y, time_steps)

# Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False)


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Define the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))  # To prevent overfitting
model.add(Dense(units=1))  # Predict one value (e.g., CO2)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])


In [None]:
# Train the LSTM model
history = model.fit(X_train, y_train, epochs=25, batch_size=32, validation_data=(X_test, y_test))

In [None]:
# Evaluate the model on the test data
test_loss = model.evaluate(X_test, y_test)

# Predict on the test set
y_pred = model.predict(X_test)

# Rescale predictions back to the original CO2 scale
y_pred_rescaled = scaler_y.inverse_transform(y_pred)

# Rescale the true test values as well for comparison
y_test_rescaled = scaler_y.inverse_transform(y_test)

# Print the test loss
print(f"Test Loss: {test_loss}")

In [None]:
import matplotlib.pyplot as plt

# Plot the true vs predicted CO2 levels
plt.figure(figsize=(14, 5))
plt.plot(y_test_rescaled, label='True CO2 levels')
plt.plot(y_pred_rescaled, label='Predicted CO2 levels', linestyle='--')
plt.title('True vs Predicted CO2 Levels')
plt.xlabel('Time Steps')
plt.ylabel('CO2 Levels')
plt.legend()
plt.show()

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# summarize history for accuracy
import matplotlib.pyplot as plt

# Plot Mean Absolute Error (MAE) for training and validation
plt.plot(history.history['mean_absolute_error'])
plt.plot(history.history['val_mean_absolute_error'])
plt.title('Model MAE')
plt.ylabel('Mean Absolute Error')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()