In [None]:
from google.colab import drive
drive.mount('/content/drive')

Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LeakyReLU

Reading Dataset

In [None]:
ds = pd.read_csv('/content/drive/MyDrive/EFQM/5-Multi label classification/Multivariate_pollution.csv')
ds.head()

Check target values

In [None]:
print('snow data', ds['snow'].unique())
print('rain data', ds['rain'].unique())

Checking all data

In [None]:
ds.info()

Creating time features

In [None]:
ds['date'] = pd.to_datetime(ds['date'])
ds['year'] = ds['date'].dt.year
ds['month'] = ds['date'].dt.month
ds['day'] = ds['date'].dt.day
ds['hour'] = ds['date'].dt.hour


ds.drop(['date'], axis=1, inplace=True)
ds.head()

create dummy data from wind_dir feature

In [None]:
dummy_wind = pd.get_dummies(ds['wnd_dir'])
data = pd.concat([ds, dummy_wind], axis=1)
ds.drop(['wnd_dir'], axis=1, inplace=True)
ds.head()

Feature Scaling

In [None]:
from sklearn.preprocessing import MinMaxScaler

X = ds.drop(columns=['snow', 'rain'])
y = ds[['snow', 'rain']]

scaler = MinMaxScaler()
scaled_x = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

ds = pd.concat([scaled_x, y], axis=1)

ds.head()

Windowing

In [None]:
X = []
y = []

sequence_length = 16
for i in range(sequence_length, len(ds)):
    X.append(ds.iloc[i-sequence_length:i].values)
    y.append(ds.iloc[i][['snow', 'rain']])

X, y = np.array(X), np.array(y)
X.shape, y.shape

Train, Test, and Validation set splitting

In [None]:
from sklearn.model_selection import train_test_split


X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
X_train.shape, X_val.shape, X_test.shape

Model

In [None]:

model = tf.keras.Sequential([
    Dense(128, input_shape=(X_train.shape[1],)),
    LeakyReLU(alpha=0.01),

    Dense(256),
    LeakyReLU(alpha=0.01),

    Dense(512),
    LeakyReLU(alpha=0.01),

    Dense(256),
    LeakyReLU(alpha=0.01),

    Dense(64),
    LeakyReLU(alpha=0.01),
    Dropout(0.4),

    Dense(2)
])

model.compile(optimizer='adam', loss='mean_squared_error')

history = model.fit(X_train, y_train, epochs=256, batch_size=512, validation_data=(X_val, y_val),
                    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=32)])

Plot Training Results

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')

Model Evaluation

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
y_pred = model.predict(X_test)
print(mean_squared_error(y_test, y_pred))
print(r2_score(y_test, y_pred))