In [None]:
!pip3 install --upgrade pip
!pip3 install --upgrade pandas
!pip3 install --upgrade sklearn

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
pd.options.mode.chained_assignment = None 
# load training data
dataset = pd.read_csv('Kraków 2021-02-01 to 2022-01-26.csv')
dataset

In [None]:
# print and select weather parameters for model
columns_names = [col for col in dataset]

pd.DataFrame(columns_names, columns=['column_name'])

In [None]:
selected_columns = [2, 5, 21]
selected_columns_names = [columns_names[col] for col in selected_columns]
print(selected_columns_names)

dataset = dataset.dropna(subset=selected_columns_names)
dataset=dataset.reset_index(drop=True)
training_set_wlabels = dataset.iloc[:,selected_columns]


In [None]:
# perform one-hot encoding of weather conditions
cond = ['Snow', 'Rain, Partially cloudy', 'Snow, Partially cloudy', 'Rain', 'Partially cloudy', 'Rain, Overcast', 'Snow, Rain, Overcast', 'Overcast', 'Snow, Overcast', 'Clear', 'Snow, Rain, Partially cloudy']
for label in cond:
    training_set_wlabels.loc[:, label] = 0
    training_set_wlabels.loc[training_set_wlabels['conditions']==label, label] = 1
training_set_wlabels = training_set_wlabels.drop(columns='conditions')

training_set = training_set_wlabels.values

In [None]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
training_set_scaled = sc.fit_transform(training_set)

In [None]:
import joblib
joblib.dump(sc, "scaler.save")

In [None]:
# setup model settings
x_train = []
y_train = []
# prediction of M samples 
n_future = 12
# based on last N samples
n_past = 48
for i in range(0,len(training_set_scaled)-n_past-n_future+1):
    x_train.append(training_set_scaled[i : i + n_past])
    y_train.append(training_set_scaled[i + n_past : i + n_past + n_future])
x_train , y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0] , x_train.shape[2], x_train.shape[1]))
y_train = np.reshape(y_train, (y_train.shape[0] , y_train.shape[2], y_train.shape[1]))

In [None]:
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout, Bidirectional

In [None]:
model = Sequential()
model.add(Bidirectional(LSTM(units=30, return_sequences=True, input_shape = (x_train.shape[1],x_train.shape[2]))))
model.add(Dropout(0.2))
model.add(LSTM(units= 30 , return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units= 30 , return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(units = n_future,activation='linear'))
model.compile(optimizer='adam', loss='mean_squared_error',metrics=['acc'])

from time import time
tstart = time()
model.fit(x_train, y_train, epochs=500,batch_size=32)
tstop = time()

In [None]:
model.summary()

In [None]:
print(f"time passed: {(int)(tstop-tstart)//3600}h {(int)(tstop-tstart)%3600//60}m {(int)(tstop-tstart)%60}s")

In [None]:
model.save('weather_forecast_model')

In [None]:
!tar czf weather_forecast_model.tgz weather_forecast_model

In [None]:
# testing prediction accuracy considering weather history
testdata = pd.read_csv('Kraków 2022-03-01 to 2022-03-09.csv')

offset = 0

testdataset_wlabels = testdata.iloc[offset:offset+30,selected_columns]

# perform one-hot encoding of weather conditions
for label in cond:
    testdataset_wlabels[label] = 0
    testdataset_wlabels.loc[testdataset_wlabels['conditions']==label, label] = 1
testdataset_wlabels = testdataset_wlabels.drop(columns='conditions')

testdataset = testdataset_wlabels.values
testing = sc.transform(testdataset)
testing = np.array(testing)
testing = np.reshape(testing,(1,testing.shape[1],testing.shape[0]))

In [None]:
predicted_weather = model.predict(testing)
predicted_weather = np.reshape(predicted_weather, (predicted_weather.shape[2],predicted_weather.shape[1]))
predicted_weather = sc.inverse_transform(predicted_weather)

In [None]:
# loading real weather for test reference
real_weather = pd.read_csv('Kraków 2022-03-01 to 2022-03-09.csv')
real_weather = real_weather.iloc[offset+30:offset+34,selected_columns]

# removing one-hot encoding
predicted_weather_wlabels = pd.DataFrame(predicted_weather, columns=[col for col in selected_columns_names if col != 'conditions']+cond)
max_cond = np.argmax(predicted_weather[:, 2:], axis=1)
for i, mcond in enumerate(max_cond):
    predicted_weather_wlabels.loc[i, 'conditions'] = cond[mcond]
predicted_weather_wlabels = predicted_weather_wlabels.drop(columns=cond)

In [None]:
real_weather

In [None]:
predicted_weather_wlabels