## Data Pre-processing

#### Import relevant modules

In [None]:
#@title Run on TensorFlow 2.x
#%tensorflow_version 2.x
#from __future__ import absolute_import, division, print_function, unicode_literals

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,confusion_matrix

#### Load the dataset

In [None]:
df = pd.read_csv("weatherAUS.csv")

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.describe(include='all')

In [None]:
df.drop(labels = ['Date','Location','Evaporation','Sunshine','Cloud3pm','Cloud9am','RISK_MM'],axis = 1,inplace = True)

In [None]:
# Replace Nilai Yes / No
df['RainToday'] = df['RainToday'].map({'Yes': 1, 'No': 0})
df['RainTomorrow'] = df['RainTomorrow'].map({'Yes': 1, 'No': 0})

In [None]:
#Remove missing values
df.dropna(inplace = True) 
df.shape

In [None]:
categorical = ['WindGustDir','WindDir9am','WindDir3pm']
df_final = pd.get_dummies(df,columns = categorical,drop_first=True)

In [None]:
df_final.head()

In [None]:
df_final['RainTomorrow'].value_counts()/len(df_final)

## Separating Your Training and Testing Datasets

In [None]:
X = df_final.drop(['RainTomorrow'],axis=1).values
y = df_final['RainTomorrow'].values

In [None]:
X1, X_test, y1, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#validation set to evaluate results from the training set
X_train, X_val, y_train, y_val = train_test_split(X1, y1, test_size=0.2, random_state=42)

## Transforming the Data

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Define the plotting function

In [None]:
#@title Define the plotting function
def plot_curve(epochs, hist, list_of_metrics):
  """Plot a curve of one or more classification metrics vs. epoch."""  

  plt.figure()
  plt.xlabel("Epoch")
  plt.ylabel("Value")

  for m in list_of_metrics:
    x = hist[m]
    plt.plot(epochs[1:], x[1:], label=m)

  plt.legend()

print("Loaded the plot_curve function.")

## Artificial Neural Network

#### Define a deep neural net model

In [None]:
def create_model(my_learning_rate):
    output_size = 1
    
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Dense(11, kernel_initializer = "uniform",activation = "relu",name='Hidden1'))# 1st hidden layer
    
    # Define the output layer. sigmoid activation
    model.add(tf.keras.layers.Dense(output_size, kernel_initializer = "uniform",activation = "sigmoid")) 
    
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=my_learning_rate),
                loss="binary_crossentropy", 
                metrics=["accuracy"])
    
    return model

#### Define a training function

In [None]:
def train_model(model, features_arr, label_arr,validation_inputs, validation_targets, epochs, 
                batch_size=None):
    # set an early stopping mechanism
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)

    history = model.fit(features_arr, label_arr, batch_size, epochs, 
                          callbacks=[early_stopping], # early stopping
                          validation_data=(validation_inputs, validation_targets), # validation data
                          verbose = 2 # making sure we get enough information about the training process
                       )
    
    # To track the progression of training, gather a snapshot
    # of the model's metrics at each epoch. 
    epochs = history.epoch
    hist = pd.DataFrame(history.history)

    return epochs, hist

#### Call the functions to build and train a deep neural net

In [None]:
# The following variables are the hyperparameters.
learning_rate = 0.002
epochs = 35
batch_size = 300

# Establish the model's topography.
my_model = create_model(learning_rate)

# Train the model
epochs, hist = train_model(my_model, X_train, y_train,X_val, y_val, epochs, 
                          batch_size)
# plot_the_loss_curve(epochs, mse)

# Plot a graph of the metric vs. epochs.
list_of_metrics_to_plot = ['accuracy']
plot_curve(epochs, hist, list_of_metrics_to_plot)

print("\n Evaluate the new model against the test set:")
my_model.evaluate(x = X_test, y = y_test, batch_size=batch_size)

## Running Predictions on the Test Set

In [None]:
y_pred = my_model.predict(X_test)
y_train_pred = my_model.predict_classes(X_train)

In [None]:
print('Training Accuracy ---->',accuracy_score(y_train,y_train_pred))
print('Testing Accuracy  ---->',accuracy_score(y_test,y_pred.round()))

In [None]:
confusion_matrix(y_test,y_pred.round(), labels=[1, 0])