Predicting the Weather with Neural Networks
===========================================


Example neural network

![title](img/ANN_with_numbers.png)

Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

Read a CSV data file.

In [24]:
df = pd.read_csv("weatherPerth.csv")
df.shape

(3193, 24)

Pre-process the data.  First, remove unwanted variables.

In [25]:
exclude = ['Date', 'RISK_MM', 'Location']

for att in exclude:
    del df[att]

Dealing with missing values.

In [26]:
df.dropna(inplace=True)


Boolean variables to 0s and 1s.

In [27]:
bools = ['RainToday', 'RainTomorrow']
for i in bools:
    df[i] = df[i].map(
    {
        "Yes":1,
        "No":0
    }
    )
    


Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,WindSpeed9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
1,6.4,20.7,0.0,1.8,7.0,NE,22.0,ESE,ENE,6,...,80.0,39.0,1024.1,1019.0,0.0,6.0,11.1,19.7,0,0
3,9.5,19.2,1.8,1.2,4.7,W,26.0,NNE,NNW,11,...,93.0,73.0,1019.3,1018.4,6.0,6.0,13.2,17.7,1,1
4,9.5,16.4,1.8,1.4,4.9,WSW,44.0,W,SW,13,...,69.0,57.0,1020.4,1022.1,7.0,5.0,15.9,16.0,1,1
5,0.7,15.9,6.8,2.4,9.3,NNE,24.0,ENE,NE,4,...,86.0,41.0,1032.0,1029.6,0.0,1.0,6.9,15.5,1,0
6,0.7,18.3,0.0,0.8,9.3,N,37.0,NE,NNE,15,...,72.0,36.0,1028.9,1024.2,1.0,5.0,8.7,17.9,0,0


Cyclical attributes

![title](img/cardinal.png)

Map Cardinal Direction to Radians

In [33]:
dirs = ['N','NNE','NE','ENE','E','ESE','SE','SSE','S','SSW','SW','WSW','W','WNW','NW','NNW']
angles = np.arange(0.0, 2.0*np.pi, 2.0*np.pi / 16.0)
wind_angles = dict(zip(dirs, angles))
print(wind_angles)

{'N': 0.0, 'NNE': 0.39269908169872414, 'NE': 0.7853981633974483, 'ENE': 1.1780972450961724, 'E': 1.5707963267948966, 'ESE': 1.9634954084936207, 'SE': 2.356194490192345, 'SSE': 2.748893571891069, 'S': 3.141592653589793, 'SSW': 3.5342917352885173, 'SW': 3.9269908169872414, 'WSW': 4.319689898685965, 'W': 4.71238898038469, 'WNW': 5.105088062083414, 'NW': 5.497787143782138, 'NNW': 5.890486225480862}


In [36]:
a = np.arange(0.0, 2.0*np.pi, 2.0*np.pi / 16.0)
# a = np.arange(1,100,np.pi)
np.cos(a)

array([ 1.00000000e+00,  9.23879533e-01,  7.07106781e-01,  3.82683432e-01,
        6.12323400e-17, -3.82683432e-01, -7.07106781e-01, -9.23879533e-01,
       -1.00000000e+00, -9.23879533e-01, -7.07106781e-01, -3.82683432e-01,
       -1.83697020e-16,  3.82683432e-01,  7.07106781e-01,  9.23879533e-01])

Replace cyclical attributes with sin() and cos()

In [37]:
wind_attributes = ['WindGustDir', 'WindDir9am', 'WindDir3pm']

for att in wind_attributes:
    df[att] = df[att].map(wind_angles)
    df[att + "cos"] = np.cos(df[att])
    df[att + "sin"] = np.sin(df[att])
    df.drop(columns = att)

Extract attributes (X) and class labels (y).

In [43]:
y = df["RainTomorrow"]
X = df.drop(columns="RainTomorrow")

Split dataset into training and testing subsets.

In [None]:
X_train,X_test, y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0)
print("X_train",X_train.shape)
print("X_test",X_test.shape)
print("y_train",y_train.shape)
print("y_test",y_test.shape)

Scale.

In [49]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train =scaler.transform(X_train)
X_test =scaler.transform(X_test)

Instantiate a neural network and train it.

![title](img/ANN_2_layers.png)

Input layer size.

In [51]:
X_train.shape

(908, 26)

Instantiate a neural network and train it.

In [52]:
nn = MLPClassifier(
        hidden_layer_sizes=(50,50),
        random_state=0,
    max_iter=500


)
nn.fit(X_train,y_train)

MLPClassifier(hidden_layer_sizes=(50, 50), max_iter=500, random_state=0)

Predict target class for the testing set.

In [53]:
y_pred = nn.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.8777533039647577


Search for best network layout.

In [55]:
p = {
    "hidden_layer_sizes":((2,),(10,),(10,10),(25,25),(50,50),)
    
}
nn = MLPClassifier(
        max_iter=2000,
        random_state= 0)

gs= GridSearchCV(nn,p,cv =5)
gs.fit(X_train,y_train)

GridSearchCV(cv=5, estimator=MLPClassifier(max_iter=2000, random_state=0),
             param_grid={'hidden_layer_sizes': ((2,), (10,), (10, 10), (25, 25),
                                                (50, 50))})

Display grid search results.

In [56]:
print(gs.cv_results_['params'])
print(gs.cv_results_["mean_test_score"])

[{'hidden_layer_sizes': (2,)}, {'hidden_layer_sizes': (10,)}, {'hidden_layer_sizes': (10, 10)}, {'hidden_layer_sizes': (25, 25)}, {'hidden_layer_sizes': (50, 50)}]
[0.90553102 0.89513248 0.87764731 0.88661849 0.88048642]


Predictions using the best neural network.

In [57]:
best_nn = gs.best_estimator_
y_pred = best_nn.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.9008810572687225
