In [60]:
# first let's capture the data
import pandas as pd
from sklearn.model_selection import train_test_split

data = pd.read_csv('weatherAUS.csv')
data['Date'] = pd.to_datetime(data["Date"])
data = data.dropna()
data = data.sort_values(by='Date')
data.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
45587,2007-11-01,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,...,68.0,29.0,1019.7,1015.0,7.0,7.0,14.4,23.6,No,Yes
45588,2007-11-02,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,...,80.0,36.0,1012.4,1008.4,5.0,3.0,17.5,25.7,Yes,Yes
45589,2007-11-03,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,...,82.0,69.0,1009.5,1007.2,8.0,7.0,15.4,20.2,Yes,Yes
45590,2007-11-04,Canberra,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,...,62.0,56.0,1005.5,1007.0,2.0,7.0,13.5,14.1,Yes,Yes
45591,2007-11-05,Canberra,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,...,68.0,49.0,1018.3,1018.5,7.0,7.0,11.1,15.4,Yes,No


In [61]:
# format the data
df = pd.get_dummies(data,columns=['Location','WindGustDir','WindDir9am','WindDir3pm'])
# convert yes's -> 1's, and no's -> 0's
df = df.replace(to_replace=['Yes', 'No'], value=[1,0])
# remove columns Date & RainTomorrow. Date might not be relevant. RainTomorrow is the Y variable.
columns = df.columns.values.tolist()
columns.remove('Date')
columns.remove('RainTomorrow')
# assign X & Y's
X = df[columns]
Y = df['RainTomorrow']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, train_size=0.8, random_state=42)
# check uniform shape of data
print(f'X_train: {X_train.shape}, Y_train: {Y_train.shape}')
print(f'X_test: {X_test.shape}, Y_test: {Y_test.shape}')

X_train: (45136, 91), Y_train: (45136,)
X_test: (11284, 91), Y_test: (11284,)


In [62]:
# feed the classifier neural network
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

mlpc = MLPClassifier(hidden_layer_sizes=(20))
chistory = mlpc.fit(X_train, Y_train)
yc_prediction = mlpc.predict(X_test)
mlpc_accuracy = accuracy_score(Y_test, yc_prediction)
print(f'MLPC Accuracy: {mlpc_accuracy}')

MLPC Accuracy: 0.8420772775611485


In [63]:
# feed the regressor neural network
from sklearn.neural_network import MLPRegressor
import numpy as np

mlpr = MLPRegressor(hidden_layer_sizes=(20), solver='adam', activation='logistic')
rhistory = mlpr.fit(X_train, Y_train)
yr_prediction: np.ndarray = mlpr.predict(X_test)
yrm_prediction = np.array(list(map(lambda y: 1 if y >= 0.5 else 0, yr_prediction)))
mlpr_accuracy = accuracy_score(Y_test, yrm_prediction)
print(f'MLPR Accuracy: {mlpr_accuracy}')

MLPR Accuracy: 0.8511166253101737


In [70]:
# mlp with keras
import tensorflow as tf
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense

mlpk = Sequential()
# mlpk.add(Dense(50, activation='relu'))
# mlpk.add(Dense(25, activation='relu'))
mlpk.add(Dense(1, activation='sigmoid'))

# compile and train model
mlpk.compile(loss=tf.keras.losses.binary_crossentropy, optimizer='sgd', metrics=['accuracy'])
mlpk.fit(X_train, Y_train, epochs=50)
# evaluate the model
mlpk_loss, mlpk_accuracy = mlpk.evaluate(X_test, Y_test)
print(f'MLPK accuracy: {mlpk_accuracy}')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
MLPK accuracy: 0.8250620365142822
