In [None]:
import json
import matplotlib.pyplot as plt
import keras
import keras.layers as layers
from GHCND import *

In [None]:
# split data into train, validate and test
# get data into correct shape (double check!!!)
# normalisation - scale to be between -1 and 1 (divide by max?)
# train and validate model
# test on input

In [None]:
f = open('data/stat_counts_tmax.txt')
data = json.load(f)

# find all stations with no data gaps
no_gaps_tmax = [k for k, v in data.items() if v == 0]
station = no_gaps_tmax[-1]
print(station)

### Get data, and shape train, validate and test arrays

In [None]:
ghn = GHCND()
ghn.readCountriesFile()
ghn.readStationsFile()

# Get list of station names
station_names = ghn.getStatKeyNames()

# get url for a given station index
fileName = f"{station}.dly"
print(f"Filename: {fileName}")
urlName = f"http://www.hep.ucl.ac.uk/undergrad/0056/other/projects/ghcnd/ghcnd_gsn/{fileName}"
print(f"url name: {urlName}")

# copy station data from remote to local
destination = f"data/{fileName}"
print(f"destination: {destination}")
urllib.request.urlretrieve(urlName, destination)
station_data = ghn.processFile(destination)
print(ghn.getStation(station))

t_max = Variable(ghn.getVar(station_data, 'TMAX'), "max temp (degC)", ghn.stationDict[station].name)
t_max.convert_time()

fig, ax = plt.subplots()
ax.plot(t_max.get_dates(), t_max.get_vals())
ax.set_xlabel("Days since first recording")
ax.set_ylabel(f"{t_max.get_label()}")
ax.legend((t_max.get_station()))

In [None]:
vals = t_max.get_vals()
vals = t_max.normalise(vals)
WINDOW_SIZE = 10
OFFSET = 365

# reshape data into input windows and targets
input, target = shapeArray(vals, WINDOW_SIZE, OFFSET)
print(f"Vals shape: {np.shape(vals)}")
print(f"Input shape: {np.shape(input)}")
print(f"Target shape: {np.shape(target)}")

# divide reshaoed data into training, vaildation and testing data
train_len = int(len(input) * 0.7)
validate_len = int(len(input) * 0.2)
test_len = int(len(input) * 0.1)

input_train = input[:train_len]
input_validate = input[train_len+1:train_len+validate_len]
input_test = input[train_len+validate_len+1:]

target_train = target[:train_len]
target_validate = target[train_len+1:train_len+validate_len]
target_test = target[train_len+validate_len+1:]

print(f"Training input shape: {np.shape(input_train)}")
print(input_train[0:4])
print(f"Training target shape: {np.shape(target_train)}")
print(target_train[0:4])


In [None]:
model = keras.models.Sequential()
model.add(layers.LSTM(64, input_shape = (None, 1), return_sequences = True)) # LSTM layer with 50 neurons
model.add(layers.LSTM(16, activation = 'linear', return_sequences = True))
model.add(layers.LSTM(4, activation = 'linear', return_sequences = False))
model.add(layers.Dense(128, activation = "linear"))
model.add(layers.Dense(1, activation = "linear"))
model.compile(loss = 'mean_squared_error', optimizer = 'adam')

model.summary()

# train model and extract final loss
history = model.fit(input_train, target_train, epochs = 30)
cost = history.history['loss']

### Least mean-squared

Sum of the squared errors.

In [None]:
fake_lms = weather_fake_loss(vals)
print(fake_lms)

The fake lms, i.e. assuming that the weather the next day will be the same as the one before, is 30,000 times greater than the loss achieved by the RNN.