Consider  attached air quality data (named Real_Combine.csv) file with following feature values:
T == Average Temperature (°C)
TM == Maximum temperature (°C)
Tm == Minimum temperature (°C)
SLP == Atmospheric pressure at sea level (hPa)
H == Average relative humidity (%)
VV == Average visibility (Km)
V == Average wind speed (Km/h)
VM == Maximum sustained wind speed (Km/h)
PM2.5== Fine particulate matter (PM2.5) is an air pollutant that is a concern for people's health when levels in air are high



Build a DNN model which will give you optimum MAE(mean absolute error) value on test data.
(Note:Use hyper parameter tuning on followings:
HyperparametersHow many number of hidden layers we should have(take range 2-20)?How many number of neurons we should have in hidden layers(range(32,512,32))?
different optimizer.

# Import The Necessary Libraries

In [1]:
import pandas as pd
import numpy as np

# Load and Preprocess Data

In [2]:
#Load the data
data=pd.read_csv("/content/Shubham THORAT - Real_Combine - Shubham THORAT - Real_Combine.csv")

In [3]:
data.head()

Unnamed: 0,T,TM,Tm,SLP,H,VV,V,VM,PM 2.5
0,,,,,,,,,
1,7.4,9.8,4.8,1017.6,93.0,0.5,4.3,9.4,219.720833
2,,,,,,,,,
3,7.8,12.7,4.4,1018.5,87.0,0.6,4.4,11.1,182.1875
4,,,,,,,,,


In [4]:
data.tail()

Unnamed: 0,T,TM,Tm,SLP,H,VV,V,VM,PM 2.5
2181,13.9,24.5,11.4,1015.0,95.0,0.6,8.7,14.8,169.0
2182,,,,,,,,,
2183,16.3,23.0,9.8,1016.9,78.0,1.1,7.4,16.5,186.041667
2184,,,,,,,,,
2185,16.3,23.4,9.0,1017.3,68.0,1.3,7.8,18.3,185.583333


In [5]:
data.shape

(2186, 9)

In [6]:
data.isnull().sum()

T         1093
TM        1093
Tm        1093
SLP       1093
H         1093
VV        1093
V         1093
VM        1093
PM 2.5    1094
dtype: int64

In [7]:
data=data.dropna()

In [8]:
data.isnull().sum()

T         0
TM        0
Tm        0
SLP       0
H         0
VV        0
V         0
VM        0
PM 2.5    0
dtype: int64

In [9]:
X=data.drop("PM 2.5",axis=1)
y=data["PM 2.5"]

In [10]:
X.shape

(1092, 8)

In [11]:
y.shape

(1092,)

In [12]:
#Scale the data using Standard scalar
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X=sc.fit_transform(X)

In [13]:
X

array([[-2.5758334 , -3.4040112 , -1.97306575, ..., -2.01150096,
        -0.63876585, -0.87715776],
       [-2.52049852, -2.96903049, -2.02687159, ..., -1.87771128,
        -0.61272881, -0.64450561],
       [-2.67266943, -2.86403514, -2.29590082, ..., -1.87771128,
        -0.50858065, -0.64450561],
       ...,
       [-1.67664167, -1.19910894, -1.08526931, ..., -1.87771128,
         0.50686392, -0.13814505],
       [-1.34463241, -1.42409896, -1.30049269, ..., -1.20876285,
         0.1683824 ,  0.0945071 ],
       [-1.34463241, -1.36410162, -1.40810438, ..., -0.94118348,
         0.27253056,  0.34084467]])

In [14]:
from sklearn.model_selection import train_test_split
#Split the data using train_test_split()
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=32)

## Define the model-building

In [15]:
from tensorflow import keras
from tensorflow.keras import layers

In [16]:
def build_model(hp):
    model = keras.Sequential()

    # Add an input layer
    model.add(layers.Dense(units=hp.Choice('units_input', values=[32, 64, 128]), activation='relu', input_dim=X_train.shape[1]))

    # Add hidden layers
    for i in range(hp.Int('num_layers', 2, 20)):
        model.add(layers.Dense(units=hp.Choice(f'units_{i}', values=[32, 64, 128, 256, 512]), activation='relu'))

    # Add the output layer
    model.add(layers.Dense(1))

    # Choose an optimizer
    optimizer = hp.Choice('optimizer', values=['adam', 'rmsprop', 'sgd'])
    if optimizer == 'adam':
        model.compile(optimizer='adam', loss='mean_absolute_error')
    elif optimizer == 'rmsprop':
        model.compile(optimizer='rmsprop', loss='mean_absolute_error')
    else:
        model.compile(optimizer='sgd', loss='mean_absolute_error')

    return model

# perform the hyperparameter search

In [17]:
!pip install keras_tuner

Collecting keras_tuner
  Downloading keras_tuner-1.3.5-py3-none-any.whl (176 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras_tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras_tuner
Successfully installed keras_tuner-1.3.5 kt-legacy-1.0.5


In [18]:
from kerastuner import RandomSearch


  from kerastuner import RandomSearch


In [19]:
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=3,
    directory='my_dir',
    project_name='air_quality'
)

In [20]:
tuner.search(X_train, y_train, epochs=100, validation_split=0.2, verbose=2)

Trial 10 Complete [00h 00m 48s]
val_loss: 31.663410822550457

Best val_loss So Far: 22.617514928181965
Total elapsed time: 00h 14m 36s


In [21]:
best_model = tuner.get_best_models(num_models=1)[0]
mae = best_model.evaluate(X_test, y_test)
print("Optimum MAE:", mae)

Optimum MAE: 26.529027938842773


In [22]:
mae1 = best_model.evaluate(X_train, y_train)
print("Optimum MAE:", mae1)

Optimum MAE: 16.489049911499023
