# Hurricane Prediction

## Intro

The idea is to predict hurricane entry based on a set of weather conditions that are measured in the USA.

In [None]:
from math import sqrt
from numpy import concatenate
from matplotlib import pyplot
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
import pandas as pd
import numpy as np
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.noise import GaussianNoise, GaussianDropout
from datetime import datetime, timedelta
from IPython.display import clear_output
from sklearn.model_selection import train_test_split
import pyowm
import yaml
import time
import os
import csv

In [None]:
with open('config.yml') as f:
    # use safe_load instead load
    config = yaml.safe_load(f)

In [None]:
def render_plots_numbers(data):
    values = data.values
    cols_to_plot = [col for col in range(1, len(data.columns)) if type(values[0, col]) != str]
    pyplot.figure(figsize=(10, len(cols_to_plot) * 1.2))
    i = 1
    for group in cols_to_plot:
        pyplot.subplot(len(cols_to_plot), 1, i)
        pyplot.plot(values[:, group])
        pyplot.title(data.columns[group], y=1, loc='right')
        i += 1
    pyplot.tight_layout(h_pad=1)
    pyplot.show()

In [None]:
houston_weather = read_csv('HurricaneData/Preprocessed/houston.csv', index_col=0)
hurdat_houston = read_csv('HurricaneData/Preprocessed/hurdat_houston.csv', index_col=0)

houston_weather.index = pd.to_datetime(houston_weather.index)
hurdat_houston.index = pd.to_datetime(hurdat_houston.index)

In [None]:
hurdat_houston

In [None]:
render_plots_numbers(hurdat_houston)

In [None]:
houston_weather

In [None]:
render_plots_numbers(houston_weather)

In [None]:
houston_to_supervise = houston_weather.loc[:, ['Events', 'Max.TemperatureF', 'Max.Dew.PointF', 'Max.Humidity', 'Max.Wind.SpeedMPH', 'Max.Sea.Level.PressureIn']]
houston_to_supervise

In [None]:
if 'Hurricane_Started' in houston_to_supervise:
    del houston_to_supervise['Hurricane_Started']
houston_to_supervise.insert(len(houston_to_supervise.columns), 'Hurricane_Started', 0)
hurricane_started = np.zeros(len(houston_to_supervise.index))
date_range = timedelta(days=2)
for i, row in enumerate(houston_to_supervise.itertuples()):
    start_date = row[0] - date_range
    end_date = row[0] + date_range
    mask = (hurdat_houston.index > start_date) & (hurdat_houston.index <= end_date)
    found_hurricane = len(hurdat_houston[mask].index) > 0
    if found_hurricane:
        hurricane_started[i] = 1
hurricane_started.shape, houston_to_supervise.shape

In [None]:
values = houston_to_supervise.values

encoder = LabelEncoder()
values[:,0] = encoder.fit_transform(values[:,0])

encoder = LabelEncoder()
values[:,1] = encoder.fit_transform(values[:,1])

values

In [None]:
# convert series to supervised learning
def series_to_supervised(data, dataset_cols, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('%s(t-%d)' % (dataset_cols[j], i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('%s(t)' % (dataset_cols[j])) for j in range(n_vars)]
		else:
			names += [('%s(t+%d)' % (dataset_cols[j], i)) for j in range(n_vars)]
	# put it all together
	agg = concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

In [None]:
# ensure all data is float
values = values.astype('float32')

# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)
# frame as supervised learning
# todo: increase input or forecast to do > 1 day forecast
reframed = series_to_supervised(scaled, houston_to_supervise.columns, 1, 1)

# drop columns we don't want to predict
reframed.drop(reframed.loc[:, 'Events(t)' : 'Max.Humidity(t)'].head(0).columns, axis=1, inplace=True)
reframed.drop(reframed.loc[:, ['Max.Sea.Level.PressureIn(t)']].head(0).columns, axis=1, inplace=True)

print(reframed.head(), reframed.shape, values.shape)

In [None]:
# split into train and test sets
train_percent = 0.5
train_amount = math.floor(len(reframed.values) * train_percent)
train = reframed.values[:train_amount, :]
test = reframed.values[train_amount:, :]
# split into input and outputs
train_X, train_y = train[:, :-1], train[:, -2:]
test_X, test_y = test[:, :-1], test[:, -2:]

# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

In [None]:
# design network
model = Sequential()
model.add(GaussianNoise(0.5, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(LSTM(1, return_sequences=False))
model.add(GaussianDropout(rate=0.5))
model.add(Dense(2))
model.compile(loss='mae', optimizer='adam')

In [None]:
while True:
    # fit network
    history = model.fit(train_X, train_y, epochs=10, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)
    
    # Clear the screen so we can see the charts
    clear_output()
    
    # plot history
    pyplot.plot(history.history['loss'], label='train')
    pyplot.plot(history.history['val_loss'], label='test')
    pyplot.legend()
    pyplot.show()

In [None]:
owm_headers = ['timestamp', 'Max.TemperatureF', 'Min.TemperatureF', 'status_short', 'status', 'wind_speed', 'wind_dir', 'cloud_coverage', 'humidity', 'pressure', 'sea_level', 'rain', 'snow']
owm_data_path = 'HurricaneData/owm_houston.csv'

In [None]:
def weather_row_to_pandas(rows):
    def val_by_name(r, name):
        return r[owm_headers.index(name)]
    
    data = []
    index = []
    for r in rows:
        index.append(r[0])
        data.append([0, val_by_name(r, 'Max.TemperatureF'), 0, val_by_name(r, 'humidity'),
                     val_by_name(r, 'wind_speed'), val_by_name(r, 'sea_level')])
    return (pd.DataFrame(data=data,
                      index=index,
                      columns=['Events', 'Max.TemperatureF', 'Max.Dew.PointF', 'Max.Humidity',
                               'Max.Wind.SpeedMPH', 'Max.Sea.Level.PressureIn']))

if os.path.isfile(owm_data_path):
    with open(owm_data_path, 'r') as csv_file:
        reader = csv.reader(csv_file,  delimiter=';',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
        next(reader) # skip header
        print(weather_row_to_pandas(reader))

In [None]:
while True:
    owm = pyowm.OWM(config['owm_api_key'])  # You MUST provide a valid API key
    fc = owm.daily_forecast('Texas')
    f = fc.get_forecast()
    file_exists = os.path.isfile(csv_path)
    
    weather_rows = []
    for w in f.get_weathers():
        rain = w.get_rain()
        snow = w.get_snow()
        temp = w.get_temperature(unit='fahrenheit')
        pres = w.get_pressure()
        wind = w.get_wind()
        weather_row = [w.get_reference_time(), temp.get('max', ''), temp.get('min', ''), w.get_status(), w.get_detailed_status(), wind.get('speed', ''), wind.get('deg', ''), w.get_clouds(), w.get_humidity(), pres.get('press', ''), pres.get('sea_level', ''), rain.get('all', ''), snow.get('all', '')]
        weather_rows.append(weather_row)
    
    weather_row_to_pandas(weather_rows)        
    time.sleep(60 * 60)

In [None]:
# make a prediction
yhat = model.predict(test_X)

# invert scaling for forecast
test_X_reshaped = test_X.reshape((test_X.shape[0], test_X.shape[2]))
inv_yhat = concatenate((yhat, test_X_reshaped[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]

# invert scaling for actual
test_y_reshaped = test_y.reshape((len(test_y), 1))
inv_y = concatenate((test_y_reshaped, test_X_reshaped[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]

pyplot.figure(figsize=(10, 10))
i = 1
pyplot.subplot(len(groups) + 1, 1, i)
pyplot.plot(inv_yhat)
pyplot.title('pollution_predicted', y=1, loc='right')
i += 1
for group in groups:
	pyplot.subplot(len(groups) + 1, 1, i)
	pyplot.plot(test_X_reshaped[:, group])
	pyplot.title(dataset.columns[group], y=1, loc='right')
	i += 1

pyplot.tight_layout(h_pad=1)
pyplot.show()