Predict climate of a country (use all stations beginning with country code)

Get data for all given stations
Find monthly means
Plot these

In [None]:
import json
import matplotlib.pyplot as plt
import keras
import keras.layers as layers
from GHCND import *

In [None]:
f = open('data/stat_counts_tmax.txt')
data = json.load(f)

# find all stations with no data gaps
no_gaps_tmax = [k for k, v in data.items() if v == 0]
station = no_gaps_tmax[0]
print(station)

In [None]:
ghn = GHCND()
ghn.readCountriesFile()
ghn.readStationsFile()

# Get list of station names
station_names = ghn.getStatKeyNames()

# get url for a given station index
fileName = f"{station}.dly"
print(f"Filename: {fileName}")
urlName = f"http://www.hep.ucl.ac.uk/undergrad/0056/other/projects/ghcnd/ghcnd_gsn/{fileName}"
print(f"url name: {urlName}")

# copy station data from remote to local
destination = f"data/{fileName}"
print(f"destination: {destination}")
urllib.request.urlretrieve(urlName, destination)
station_data = ghn.processFile(destination)
print(ghn.getStation(station))

t_max = Variable(ghn.getVar(station_data, 'TMAX'), "max temp (degC)", ghn.stationDict[station].name)
dates = t_max.get_dates()
vals = t_max.get_vals()


In [None]:
means = np.array(t_max.get_monthly_means())

fig, ax = plt.subplots()
ax.plot(means[:(12*10)])
ax.set_xlabel("Month")
ax.set_ylabel("Mean monthly maximum temperature (degC)")
ax.set_title("Mean monthly maximum temperatures over 10 years")

In [None]:
# normalise means: divide by the biggest value
means_normalised = t_max.normalise(means)

fig, ax = plt.subplots()
ax.plot(means_normalised[:(12*10)])
ax.set_xlabel("Month")
ax.set_ylabel("Normalised mean monthly maximum temperature (degC)")
ax.set_title("Mean monthly maximum temperatures over 10 years")

### Train model

In [None]:
WINDOW_SIZE = 12
OFFSET = 12

train_len = int(len(means_normalised) * 0.7)
validate_len = int(len(means_normalised) * 0.2)
test_len = int(len(means_normalised) * 0.1)

# divide data into training, validating and testing sets
means_train = means_normalised[test_len+1:test_len+train_len]
means_validate = means_normalised[test_len+train_len+1:]
means_test = means_normalised[:test_len]

# reshape data into input windows and targets
input_train, target_train = shapeArray(means_train, WINDOW_SIZE, OFFSET)
input_validate, target_validate = shapeArray(means_validate, WINDOW_SIZE, OFFSET)
input_test, target_test = shapeArray(means_test, WINDOW_SIZE, OFFSET)


In [None]:
n_features = 1
input_train = input_train.reshape((input_train.shape[0], input_train.shape[1], n_features))
input_test = input_test.reshape((input_test.shape[0], input_test.shape[1], n_features))
input_validate = input_validate.reshape((input_validate.shape[0], input_validate.shape[1], n_features))


In [None]:
# model that definitely works:

model = keras.models.Sequential()
model.add(layers.LSTM(64, input_shape = (WINDOW_SIZE, 1), activation = 'relu', return_sequences = False))
model.add(layers.Dense(1, activation = "linear"))
model.compile(loss = 'mean_squared_error', optimizer = 'adam')

model.summary()

# train model and extract final loss
history = model.fit(input_train, target_train, epochs = 100, validation_data = (input_validate, target_validate))
cost = history.history['loss']

In [None]:
model = keras.models.Sequential()
model.add(layers.LSTM(64, input_shape = (WINDOW_SIZE, 1), activation = 'relu', return_sequences = True))
model.add(layers.LSTM(128, activation = 'relu'))
model.add(layers.Dense(1, activation = "linear"))
model.compile(loss = 'mean_squared_error', optimizer = 'adam')

model.summary()

# train model and extract final loss
history = model.fit(input_train, target_train, epochs = 200, validation_data = (input_validate, target_validate))
cost = history.history['loss']
val_cost = history.history['val_loss']

fig, ax = plt.subplots()
ax.plot(cost)
ax.plot(val_cost)


In [None]:
prediction = model.predict(input_test)

fig, ax = plt.subplots()
ax.plot(prediction)
ax.plot(target_test)

### Plot first 10 years of data against predictions