In [29]:
import os
import sys
import glob
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (22,8)
np.random.seed(30)
sns.set_theme(style="darkgrid")
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)
warnings.filterwarnings('ignore')
%matplotlib inline

In [30]:
df = pd.read_csv("./data/air-quality-index.csv")
df.tail(3)

Unnamed: 0,No,year,month,day,hour,PM2.5,PM10,SO2,NO2,CO,O3,TEMP,PRES,DEWP,RAIN,wd,WSPM,station
420765,35062,2017,2,28,21,14.0,28.0,4.0,38.0,500.0,54.0,10.8,1014.2,-13.3,0.0,NW,1.1,Wanshouxigong
420766,35063,2017,2,28,22,12.0,23.0,4.0,30.0,400.0,59.0,10.5,1014.4,-12.9,0.0,NNW,1.2,Wanshouxigong
420767,35064,2017,2,28,23,13.0,19.0,4.0,38.0,600.0,49.0,8.6,1014.1,-15.9,0.0,NNE,1.3,Wanshouxigong


In [31]:
df = df[df.duplicated() == False]

In [32]:
df = df[['PM2.5', 'SO2', 'NO2', 'CO', 'O3']]

In [33]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


In [34]:
X, y = df.iloc[:,1:3], df['PM2.5']
y.head()

0    4.0
1    8.0
2    7.0
3    6.0
4    3.0
Name: PM2.5, dtype: float64

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 30)

In [36]:
X_train[:10]
X.shape

(420768, 2)

In [37]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1, input_dim=X.shape[1]),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation = 'sigmoid')
])

In [38]:
model.compile(optimizer='adam', loss = 'mean_squared_error', metrics=['accuracy'])

In [39]:
history = model.fit(X_train, y_train.to_numpy(), batch_size = 10, epochs = 3, verbose = 1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [40]:
y_pred = model.predict(X_test)