In [None]:
# Predictive Analytics of Air Quality for IoT- Enabled Industrial Environments

In [None]:
# Following code implemnents Hybrid LSTM Models and Several Classifiers for Air quality prediction 
# and classification tasks repectively.

In [None]:
# Hybrid Learning LSTM (HL-LSTM) models

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

# Load your dataset
df = pd.read_csv('data/airparticle.csv')

# Convert timestamp to datetime and sort by time
df['ts_received'] = pd.to_datetime(df['ts_received'])
df = df.sort_values('ts_received')

# Extract useful time-based features from the timestamp
df['hour'] = df['ts_received'].dt.hour
df['day'] = df['ts_received'].dt.day
df['month'] = df['ts_received'].dt.month
df['day_of_week'] = df['ts_received'].dt.dayofweek

# Include the new features in the feature set
features = df[['voc', 'mc_1p0', 'mc_2p5', 'mc_10p0', 'mc_4p0', 'ambient_rh', 'ambient_t', 'nox_index', 'voc_index',
               'hour', 'day', 'month', 'day_of_week']].values

# Extract features and targets
#features = df[['voc', 'mc_1p0', 'mc_2p5', 'mc_10p0', 'mc_4p0', 'ambient_rh', 'ambient_t', 'nox_index', 'voc_index']].values
y_pred = df['co2'].values  # AQI Prediction target using 'co2'
y_class = df['class'].values  # Classification target

# Note: Each air quality pollutant target is tested seperately
# please switch between your target variable for the required prediction

#target = 'voc'
#target = 'mc_2p5'
#target = 'mc_10p0'
#target = 'nox_index'


# Check for NaN or Infinite values
features = np.nan_to_num(features)
y_pred = np.nan_to_num(y_pred)

# Normalize the feature columns
scaler_X = MinMaxScaler()
X_scaled = scaler_X.fit_transform(features)

# Normalize the target (AQI Prediction, using 'co2')
scaler_y = MinMaxScaler()
y_pred_scaled = scaler_y.fit_transform(y_pred.reshape(-1, 1))

# Create sequences for LSTM
def create_sequences(features, target, n_timesteps):
    Xs, ys = [], []
    for i in range(len(features) - n_timesteps):
        Xs.append(features[i:i + n_timesteps])
        ys.append(target[i + n_timesteps])
    return np.array(Xs), np.array(ys)

n_timesteps = 10

# Create sequences for both features and targets after normalization
X, y_pred_seq = create_sequences(X_scaled, y_pred_scaled, n_timesteps)
_, y_class_seq = create_sequences(X_scaled, y_class, n_timesteps)

# Train/test split
X_train, X_test, y_train_pred, y_test_pred, y_train_class, y_test_class = train_test_split(
    X, y_pred_seq, y_class_seq, test_size=0.2, random_state=42)

# Check data shapes
print(X_train.shape, y_train_pred.shape, y_train_class.shape)

# Check the unique class labels
print(np.unique(y_class))

# Model definition
input_layer = Input(shape=(n_timesteps, X_train.shape[2]))

# Shared LSTM Encoder
shared_lstm = LSTM(128, activation='relu')(input_layer)
shared_lstm = Dropout(0.4)(shared_lstm)

# Prediction Head (AQI prediction)
prediction_head = Dense(64, activation='relu')(shared_lstm)
prediction_output = Dense(1, activation='linear', name='AQI_Prediction')(prediction_head)

# Adjust the Classification Head for 6 classes (if there are 6 unique classes)
classification_head = Dense(64, activation='relu')(shared_lstm)
classification_output = Dense(6, activation='softmax', name='AQI_Classification')(classification_head)

# Define the model
model = Model(inputs=input_layer, outputs=[prediction_output, classification_output])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss={'AQI_Prediction': 'Huber', 'AQI_Classification': 'sparse_categorical_crossentropy'},
              metrics={'AQI_Prediction': 'mse', 'AQI_Classification': 'accuracy'})

# Train the model
history = model.fit(X_train, {'AQI_Prediction': y_train_pred, 'AQI_Classification': y_train_class},
                    validation_split=0.2,
                    epochs=5, batch_size=32)

# Evaluate the model
model.evaluate(X_test, {'AQI_Prediction': y_test_pred, 'AQI_Classification': y_test_class})

In [None]:
# Following section provides implementation of different classification models that are used for 
# - air particle quality classfication


In [None]:
#RandomForest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier

from sklearn.metrics import accuracy_score
import time

# Extract features from LSTM encoder
feature_extractor = Model(inputs=input_layer, outputs=shared_lstm)
train_features = feature_extractor.predict(X_train)
test_features = feature_extractor.predict(X_test)

# Train RandomForest Classifier
start_time = time.time()
rf_clf = RandomForestClassifier(n_estimators=20, random_state=42)
rf_clf.fit(train_features, y_train_class)
rf_preds = rf_clf.predict(test_features)
rf_accuracy = accuracy_score(y_test_class, rf_preds)
print(f"RandomForest Accuracy: {rf_accuracy}, Time taken: {time.time() - start_time:.2f} seconds")

In [None]:
#LightGBM

In [None]:
import lightgbm as lgb

lgb_clf = lgb.LGBMClassifier(n_estimators=20, random_state=42)
lgb_clf.fit(train_features, y_train_class)
lgb_preds = lgb_clf.predict(test_features)
lgb_accuracy = accuracy_score(y_test_class, lgb_preds)
print(f"LightGBM Accuracy: {lgb_accuracy}")

In [None]:
#KNeighborsClassifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import time

# Train KNN Classifier
start_time = time.time()
knn_clf = KNeighborsClassifier(n_neighbors=5)  # You can adjust n_neighbors
knn_clf.fit(train_features, y_train_class)
knn_preds = knn_clf.predict(test_features)
knn_accuracy = accuracy_score(y_test_class, knn_preds)
print(f"KNN Accuracy: {knn_accuracy}, Time taken: {time.time() - start_time:.2f} seconds")

In [None]:
#XGBoost Classifier

In [None]:
import xgboost as xgb
from sklearn.metrics import accuracy_score
import time

# Train XGBoost Classifier
start_time = time.time()
xgb_clf = xgb.XGBClassifier(n_estimators=20, max_depth=6, random_state=42, use_label_encoder=False)
xgb_clf.fit(train_features, y_train_class)
xgb_preds = xgb_clf.predict(test_features)
xgb_accuracy = accuracy_score(y_test_class, xgb_preds)
print(f"XGBoost Accuracy: {xgb_accuracy}, Time taken: {time.time() - start_time:.2f} seconds")

In [None]:
#GradientBoosting

In [None]:
# Train GradientBoosting Classifier
from sklearn.metrics import accuracy_score
import time
# Extract features from LSTM encoder
feature_extractor = Model(inputs=input_layer, outputs=shared_lstm)
train_features = feature_extractor.predict(X_train)
test_features = feature_extractor.predict(X_test)

start_time = time.time()
gb_clf = GradientBoostingClassifier(n_estimators=10, random_state=42)
gb_clf.fit(train_features, y_train_class)
gb_preds = gb_clf.predict(test_features)
gb_accuracy = accuracy_score(y_test_class, gb_preds)
print(f"GradientBoosting Accuracy: {gb_accuracy}, Time taken: {time.time() - start_time:.2f} seconds")

In [None]:
# SVC Classifier

In [None]:
# Train SVC Classifier
start_time = time.time()
svc_clf = SVC(probability=True, random_state=42)
svc_clf.fit(train_features, y_train_class)
svc_preds = svc_clf.predict(test_features)
svc_accuracy = accuracy_score(y_test_class, svc_preds)
print(f"SVC Accuracy: {svc_accuracy}, Time taken: {time.time() - start_time:.2f} seconds")

In [None]:
#MLP Classifier

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import time
# Extract features from LSTM encoder
feature_extractor = Model(inputs=input_layer, outputs=shared_lstm)
train_features = feature_extractor.predict(X_train)
test_features = feature_extractor.predict(X_test)
# Train MLP Classifier
start_time = time.time()
mlp_clf = MLPClassifier(hidden_layer_sizes=(50,), max_iter=50, random_state=42)  # Adjust hidden layers and iterations
mlp_clf.fit(train_features, y_train_class)
mlp_preds = mlp_clf.predict(test_features)
mlp_accuracy = accuracy_score(y_test_class, mlp_preds)
print(f"MLP Accuracy: {mlp_accuracy}, Time taken: {time.time() - start_time:.2f} seconds")

In [None]:
# Logistic Regression Classifier

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import time

# Train Logistic Regression Classifier
start_time = time.time()
lr_clf = LogisticRegression(random_state=42, max_iter=50)  # Increase max_iter if needed
lr_clf.fit(train_features, y_train_class)
lr_preds = lr_clf.predict(test_features)
lr_accuracy = accuracy_score(y_test_class, lr_preds)
print(f"Logistic Regression Accuracy: {lr_accuracy}, Time taken: {time.time() - start_time:.2f} seconds")

In [None]:
# Naive Bayes Classifier

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import time

# Train Naive Bayes Classifier
start_time = time.time()
nb_clf = GaussianNB()
nb_clf.fit(train_features, y_train_class)
nb_preds = nb_clf.predict(test_features)
nb_accuracy = accuracy_score(y_test_class, nb_preds)
print(f"Naive Bayes Accuracy: {nb_accuracy}, Time taken: {time.time() - start_time:.2f} seconds")