In [2]:
import pandas as pd
from sklearn.utils import shuffle

X_test = pd.read_csv('X_test.csv')
X_train = pd.read_csv('X_train.csv')
X_val = pd.read_csv('X_val.csv')
y_train = pd.read_csv('y_train.csv')
y_val = pd.read_csv('y_val.csv')

X_train, y_train = shuffle(X_train, y_train, random_state=55)
X_val, y_val = shuffle(X_val, y_val, random_state=55)

In [24]:
y_train.shape

(705, 1)

In [42]:
label_mapping_legend = {'Good': 0, 'Moderate': 1, 'Poor': 2, 'Severe': 3, 'Unhealthy': 4}
reversed_label_mapping = {value: key for key, value in label_mapping_legend.items()}

In [18]:
y_train.shape

(705, 1)

In [26]:
import tensorflow as tf
from tensorflow.keras import layers, optimizers, models
from tensorflow.keras.callbacks import EarlyStopping

input_shape = (X_train.shape[1],)
num_classes = 5

inputs = layers.Input(shape=input_shape)
x = layers.Dense(256, kernel_initializer='he_normal')(inputs)
x = layers.LeakyReLU(negative_slope=0.01)(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(128, kernel_initializer='he_normal')(x)
x = layers.LeakyReLU(negative_slope=0.01)(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(128, kernel_initializer='he_normal')(x)
x = layers.LeakyReLU(negative_slope=0.01)(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(64, kernel_initializer='he_normal')(x)
x = layers.LeakyReLU(negative_slope=0.01)(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(64, kernel_initializer='he_normal')(x)
x = layers.LeakyReLU(negative_slope=0.01)(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(32, kernel_initializer='he_normal')(x)
x = layers.LeakyReLU(negative_slope=0.01)(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(32, kernel_initializer='he_normal')(x)
x = layers.LeakyReLU(negative_slope=0.01)(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(16, kernel_initializer='he_normal')(x)
x = layers.LeakyReLU(negative_slope=0.01)(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)

outputs = layers.Dense(num_classes, activation='softmax')(x)

model = models.Model(inputs=inputs, outputs=outputs)

initial_learning_rate = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=10000, decay_rate=0.96, staircase=True)

optimizer = optimizers.Adam(learning_rate=lr_schedule)
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=100, batch_size=32,
                    callbacks=[early_stopping])

train_accuracy = model.evaluate(X_train, y_train)[1]
val_accuracy = model.evaluate(X_val, y_val)[1]

train_accuracy, val_accuracy

Epoch 1/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.2362 - loss: 2.1807 - val_accuracy: 0.0056 - val_loss: 284.7353
Epoch 2/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3312 - loss: 1.9703 - val_accuracy: 0.0056 - val_loss: 149.0670
Epoch 3/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3498 - loss: 1.7722 - val_accuracy: 0.0056 - val_loss: 95.1239
Epoch 4/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4326 - loss: 1.5681 - val_accuracy: 0.0056 - val_loss: 68.6350
Epoch 5/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4800 - loss: 1.4867 - val_accuracy: 0.0056 - val_loss: 53.7893
Epoch 6/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5867 - loss: 1.3145 - val_accuracy: 0.0056 - val_loss: 27.5536
Epoch 7/100
[1m23/23[0m 

(0.9560283422470093, 0.994350254535675)

In [28]:
y_test_predictions = model.predict(X_test)

y_test_pred_labels = tf.argmax(y_test_predictions, axis=1)

y_test_pred_labels.numpy()

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0])

In [44]:
predicted_aqi = [reversed_label_mapping[label] for label in y_test_pred_labels.numpy()]

ids = range(1, len(predicted_aqi) + 1)

predictions_df = pd.DataFrame({
    'ID': ids,
    'Predicted_AQI': predicted_aqi
})
predictions_df.head()

Unnamed: 0,ID,Predicted_AQI
0,1,Good
1,2,Good
2,3,Good
3,4,Good
4,5,Good


In [48]:
predictions_df.to_csv('tensorflow_basic_model.csv', index=False)