<h2>Rain in Austrailia</h2>

<h4><b>Objective:</b> Predict next-day rain in Australia using Artificial Neural Networks </h4>

In [None]:
# Import Required Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import preprocessing 
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Add, Dense, Dropout, Activation, Input

In [None]:
# Import the dataset
data = pd.read_csv('../input/weather-dataset-rattle-package/weatherAUS.csv')

# View the first 5 samples of the dataset
data.head(5)

<h3>1. Data Preprocessing</h3>

<h4>A. Data Cleaning</h4>

In [None]:
# Check how many records have missing or null values
data[data.isnull().any(axis=1)].count()

In [None]:
# Drop/Remove data with incomplete entries from the training dataset
data = data.dropna()

# Preview the new shape of the new dataset
data.shape

In [None]:
# Preview the dataset general information 
data.info()

In [None]:
# Extract the month from the Date Feature (This will be useful for our model as some months (June, July, etc.) are naturally expected to have more rainfalls than others (November, Decemeber, January, etc.)
data['Date'] = pd.DatetimeIndex(data['Date']).month

In [None]:
# Convert some categorical features to numerical features using One-hot encoding 

data = pd.get_dummies(data, columns=['Date'])
data = pd.get_dummies(data, columns=['WindGustDir'])
data = pd.get_dummies(data, columns=['WindDir9am'])
data = pd.get_dummies(data, columns=['WindDir3pm'])

# Convert other categorical features to numerical features using Label encoding 
label_encoding = preprocessing.LabelEncoder()
data['Location'] = label_encoding.fit_transform(data['Location'].astype(str))
data['RainToday'] = label_encoding.fit_transform(data['RainToday'].astype(str))
data['RainTomorrow'] = label_encoding.fit_transform(data['RainTomorrow'].astype(str))

# Preview the first 5 samples of the cleaned dataset
data.head(5)

In [None]:
# Shuffle Dataset to ensure ML model doesn't inadvertly pick up patterns in the ordering of the dataset
data = data.sample(frac=1).reset_index(drop = True)

<h4>B. Feature Selection</h4>

In [None]:
# Drop the Location feature since the Model is independent of the feature 
X = data.drop(['Location', 'RainTomorrow'], axis = 1)
Y = data['RainTomorrow']

In [None]:
# Normalize Dataset
X = preprocessing.Normalizer().fit(X).transform(X)

In [None]:
# Convert Pandas Dataframe to Numpy array
X = np.asarray(X).astype(np.float32)
Y = np.asarray(Y).astype(np.float32)

In [None]:
# Split dataset into train-validation Samples
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size = 0.2)

# Further split the Validation set into dev and test sets
X_dev, X_test, Y_dev, Y_test = train_test_split(X_val, Y_val, test_size = 0.5)


In [None]:
# Preview Training, Dev and Test sets 
# The dev set will be used to evaluate model performance during training, while the test set will be used to evaluate overall model performance after training)
print("Training set: ", X_train.shape, Y_train.shape)
print("Dev set: ", X_dev.shape, Y_dev.shape)
print("Test set: ", X_test.shape, Y_test.shape)

<h3>2. Model Design & Training</h3>

In [None]:
# Model Architecture (Using Shortcuts allow us to build deep neural networks without worrying about vanishing gradients)

input_tensor = Input(shape=(77,))

Z1 = Dense(512, input_shape = (77,))(input_tensor)
A1 = Activation('relu')(Z1)
A1 = Dropout(0.2)(A1)
Z2 = Dense(512)(A1)
A2 = Activation('relu')(Z2)
A2 = Dropout(0.2)(A2)
Z3 = Dense(512)(A2)
Z3 = Add()([Z3, A1]) # Shortcut
A3 = Activation('relu')(Z3)
A3 = Dropout(0.2)(A3)

Z4 = Dense(256)(A3)
A4 = Activation('relu')(Z4)
A4 = Dropout(0.1)(A4)
Z5 = Dense(256)(A4)
A5 = Activation('relu')(Z5)
A5 = Dropout(0.1)(A5)
Z6 = Dense(256)(A5)
Z6 = Add()([Z6, A4]) # Shortcut
A6 = Activation('relu')(Z6)
A6 = Dropout(0.1)(A6)

Z7 = Dense(128)(A6)
A7 = Activation('relu')(Z7)
A7 = Dropout(0.1)(A7)
Z8 = Dense(128)(A7)
A8 = Activation('relu')(Z8)
A8 = Dropout(0.1)(A8)
Z9 = Dense(128)(A8)
Z9 = Add()([Z9, A7]) # Shortcut
A9 = Activation('relu')(Z9)
A9 = Dropout(0.1)(A9)

Z10 = Dense(64)(A9)
A10 = Activation('relu')(Z10)
A10 = Dropout(0.1)(A10)
Z11 = Dense(64)(A10)
A11 = Activation('relu')(Z11)
A11 = Dropout(0.1)(A11)
Z12 = Dense(64)(A11)
Z12 = Add()([Z12, A10]) # Shortcut
A12 = Activation('relu')(Z12)
A12 = Dropout(0.1)(A12)


Z13 = Dense(32)(A12)
A13 = Activation('relu')(Z13)
A13 = Dropout(0.1)(A13)
Z14 = Dense(32)(A13)
A14 = Activation('relu')(Z14)
A14 = Dropout(0.1)(A14)
Z15 = Dense(32)(A14)
Z15 = Add()([Z15, A13]) # Shortcut
A15 = Activation('relu')(Z15)
A15 = Dropout(0.1)(A15)


Z16 = Dense(16)(A15)
A16 = Activation('relu')(Z16)
A16 = Dropout(0.1)(A16)
Z17 = Dense(16)(A16)
A17 = Activation('relu')(Z17)
A17 = Dropout(0.1)(A17)
Z18 = Dense(16)(A17)
Z18 = Add()([Z18, A16]) # Shortcut
A18 = Activation('relu')(Z18)
A18 = Dropout(0.1)(A18)


Z19 = Dense(8)(A18)
A19 = Activation('relu')(Z19)
Z20 = Dense(4)(A19)
A20 = Activation('relu')(Z20)
Z21 = Dense(2)(A20)
A21 = Activation('relu')(Z21)

output_tensor = Dense(1, activation = "sigmoid")(A21)

In [None]:
model = Model(inputs=input_tensor, outputs=output_tensor)

In [None]:
model.summary()

In [None]:
# Specify the Parameters Required to fit the Model
loss = 'binary_crossentropy'
optimizer = 'Adam'
metrics = ['accuracy']
epochs = 50
batch_size = 128

# Compile the Model 
model.compile(loss = loss,
              optimizer = optimizer,
              metrics = metrics)

# Fit the Model to the training data
model_history = model.fit(X_train, Y_train, epochs = epochs, batch_size = batch_size, validation_data = (X_dev, Y_dev))

In [None]:
# Plot and preview the Loss and Accuracy Graph

acc = [0.] + model_history.history['accuracy']
val_acc = [0.] + model_history.history['val_accuracy']

loss =  model_history.history['loss']
val_loss =  model_history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
# Evaluate the model performance usind the test data
model_evaluation = model.evaluate(X_test, Y_test, batch_size=256)

In [None]:
# Preview the accuracy of the Model on both the training and test set
train_accuracy = round(model_history.history['accuracy'][-1] * 100, 2)
dev_accuracy = round(model_history.history['val_accuracy'][-1] * 100, 2)
test_accuracy = round(model_evaluation[1] * 100, 2)

print("Train Accuracy: {}%".format(train_accuracy))
print("During Training Validation Accuracy: {}%".format(dev_accuracy))
print("After Training Test Accuracy: {}%".format(test_accuracy))