In [1]:
#import all the relevant libraries
import pandas as pd
import tensorflow as tf
import keras 
from keras import layers
from keras.layers import Dense
import keras_tuner

In [2]:
#reading the data from csv file
df = pd.read_csv('Real_Combine.csv')

In [3]:
df.head()

Unnamed: 0,T,TM,Tm,SLP,H,VV,V,VM,PM 2.5
0,7.4,9.8,4.8,1017.6,93.0,0.5,4.3,9.4,219.720833
1,7.8,12.7,4.4,1018.5,87.0,0.6,4.4,11.1,182.1875
2,6.7,13.4,2.4,1019.4,82.0,0.6,4.8,11.1,154.0375
3,8.6,15.5,3.3,1018.7,72.0,0.8,8.1,20.6,223.208333
4,12.4,20.9,4.4,1017.3,61.0,1.3,8.7,22.2,200.645833


In [7]:
#checking for missing values 
df[df['PM 2.5'].isna()]

Unnamed: 0,T,TM,Tm,SLP,H,VV,V,VM,PM 2.5
184,14.3,19.2,10.9,1020.5,91.0,1.6,4.8,11.1,


In [9]:
df.drop(index=184, inplace= True)

In [10]:
df.isnull().sum()

T         0
TM        0
Tm        0
SLP       0
H         0
VV        0
V         0
VM        0
PM 2.5    0
dtype: int64

In [14]:
# setting up inputs(x) and targets(y)
x = df.drop('PM 2.5', axis = 1)
y = pd.DataFrame(df['PM 2.5'])

In [15]:
#building a sequential model as I have single input and single output
model = keras.Sequential()

In [16]:
x.shape

(1092, 8)

In [17]:
#adding layers and neurons to the ANN
model.add(layers.Input(8))
model.add(layers.Dense(10,'relu'))
model.add(layers.Dense(10,'relu'))
model.add(layers.Dense(1,'linear'))

In [18]:
#compiling the model with adam optimizer and meansquarederror loss
model.compile(optimizer='adam',loss='mean_squared_error', metrics=['mean_squared_error'])

In [19]:
#spliting train and test data with traintestsplit
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.6)

In [20]:
#normalizing the data 
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()
Xtrain = sc.fit_transform(xtrain)
Xtest = sc.transform(xtest)

sc1 = MinMaxScaler()
Ytrain = sc1.fit_transform(ytrain)
Ytest = sc1.transform(ytest)

In [21]:
model.fit(Xtrain,Ytrain, epochs= 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2683adc89d0>

In [22]:
model.evaluate(Xtest,Ytest, batch_size= 30)



[0.022531956434249878, 0.022531956434249878]

##### Hyper parameter Tuning with Keras Tuner

In [23]:
from keras_tuner import RandomSearch

In [39]:
def build_model(hp):
    model_1 = keras.Sequential()
    #tune number of layers
    for i in range(hp.Int('layers',1,3)):
        model_1.add(layers.Dense(
            #tune number of units
            hp.Int('units', min_value = 4, max_value = 100, step = 2),
            activation='relu'
        
        )
                   )
    model_1.add(layers.Dense(1,'linear'))
    #tune the learning rate
    learning_rate = [1e-4,1e-2,1e-6]
    model_1.compile(optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate',learning_rate)),loss='mean_squared_error', metrics=['mean_squared_error'])
    return model_1

In [44]:
tuner = RandomSearch(
    hypermodel=build_model,
    objective='val_mean_squared_error',
    max_trials=5
)

In [45]:
tuner.search(Xtrain, Ytrain, epochs=10, validation_data=(Xtest, Ytest))

Trial 5 Complete [00h 00m 01s]
val_mean_squared_error: 0.018709233030676842

Best val_mean_squared_error So Far: 0.018709233030676842
Total elapsed time: 00h 00m 09s
INFO:tensorflow:Oracle triggered exit


#### with the help of hyper parameter tuning we reduced the meansquarederror and found best model

In [48]:
best_model = tuner.get_best_models()[0]

In [49]:
best_model.fit(Xtrain,Ytrain, epochs= 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x268390b72e0>

In [50]:
best_model.evaluate(Xtest,Ytest, batch_size= 30)



[0.016283830627799034, 0.016283830627799034]

In [52]:
predictions = best_model.predict(Xtest, batch_size = 30)

In [53]:
#using inverse transform on predictions to see the actual predicted values
predictions_inverse = sc1.inverse_transform(predictions)

In [55]:
Actual = Ytest

In [56]:
Actual_inverse = sc1.inverse_transform(Actual)

In [69]:
final_result = pd.DataFrame(predictions_inverse)
final_result.rename(columns={0: 'Predictions'}, inplace = True)

In [71]:
final_result['Actual'] = Actual_inverse

In [72]:
final_result.head()

Unnamed: 0,Predictions,Actual
0,91.770668,122.708333
1,96.091408,89.375
2,49.178345,71.666667
3,229.247955,219.720833
4,81.905151,105.333333
