In [1]:
from tensorflow.keras.layers import Input, LSTM, GRU, SimpleRNN, Dense, GlobalMaxPool1D,Reshape
# from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras import layers,models
# from tensorflow.keras import layers, models

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
import lightgbm as lgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.model_selection import cross_val_score
from lightgbm import LGBMClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import KFold
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import LabelEncoder

In [2]:
files_path = 'CleanFiles/'
files_format = '.csv'
files_postfix = 'Extract'
file_names = ['AgiaParaskevi','Aristotelous','Athens','Elefsina','Koropi','Liosia',
              'Lykovrisi','Marousi','NeaSmirni','Patision','Peristeri',
              'Pireus','Thrakomakedones']
#file_names = ['AgiaParaskevi']

In [3]:
def pm10_index(val):
    if val <= 25.0:
        return 0
    elif 26.0 <= val <= 50.0:
        return 1
    elif 51.0 <= val <= 90.0:
        return 2
    elif 91.0 <= val <= 180.0:
        return 3
    else:
        return 4

In [4]:
def get_time(tmp):
    return tmp[11:13]

In [5]:
def get_season(tmp):
    if tmp == 'Spring':
        return '1'
    elif tmp == 'Spring/Summer':
        return '2'
    elif tmp == 'Summer':
        return '3'
    elif tmp == 'Summer/Autumn':
        return '4'
    elif tmp == 'Autumn':
        return '5'
    elif tmp == 'Autumn/Winter':
        return '6'
    elif tmp == 'Winter/Spring':
        return '7'
    else:
        return '8'

In [6]:
def get_winddir(tmp):
    if tmp == 'N':
        return '1'
    elif tmp == 'NNE':
        return '2'
    elif tmp == 'NE':
        return '3'
    elif tmp == 'ENE':
        return '4'
    elif tmp == 'E':
        return '5'
    elif tmp == 'ESE':
        return '6'
    elif tmp == 'SE':
        return '7'
    elif tmp == 'SSE':
        return '8'
    elif tmp == 'S':
        return '9'
    elif tmp == 'SSW':
        return '10'
    elif tmp == 'SW':
        return '11'
    elif tmp == 'WSW':
        return '12'
    elif tmp == 'W':
        return '13'
    elif tmp == 'WNW':
        return '14'
    elif tmp == 'NW':
        return '15'
    else:
        return '16'

In [7]:
#neasmirni = pd.read_csv('CleanFiles/NeaSmirniExtract.csv', sep=',')
#neasmirni = neasmirni[['date_time','station_id','season','real_temp','real_windspd','real_winddir','real_humidity','forecast_tempC','forecast_windSpeed','forecast_windDirection','forecast_humidity','pm10']]
#neasmirni['pm10'] = neasmirni['pm10'].map(lambda a: pm10_index(a))
#temp = pd.read_csv('CleanFiles/PeristeriExtract.csv', sep=',')
#agiaparaskevi = pd.read_csv('CleanFiles/AgiaParaskeviExtract.csv', sep=',')

In [8]:
# T = 72
# D = 1
# X = []
# Y = []
# for t in range(len(neasmirni) - T):
#     x = neasmirni[t:t+T]
#     X.append(x)
#     y = neasmirni[t:t+T]
#     Y.append(y)

In [9]:
def create_xy(series, series2, window_size, prediction_horizon, shuffle = False):
    x = []
    y = []
    for i in range(0, len(series)):
        if len(series[(i + window_size):(i + window_size + prediction_horizon)]) < prediction_horizon:
            break
        x.append(np.array(series[i:(i + window_size)]))
        y.append(np.array(series2[(i + window_size):(i + window_size + prediction_horizon)]))
#         x.append(np.array(series[i:i+window_size][['station_id','season','real_temp','real_humidity','real_windspd','real_winddir','forecast_tempC','forecast_humidity','forecast_windSpeed','forecast_windDirection']]))
#         y.append(np.array(series[i+window_size:i+window_size+prediction_horizon]['pm10']))
    x = np.array(x)
    y = np.array(y)
    return x,y

In [10]:
parameters = {
    'n_estimators': 2000,
    'max_depth': 4,
    'num_leaves': 2**4,
    'learning_rate': 0.1,
    'boosting_type': 'dart'
}

In [11]:
def gru(units,drop,out_features):

    input=layers.Input(shape=(72,10))
    result=GRU(units,dropout=drop,return_sequences=False)(input)
    result=layers.Dense(2)(result)
    result=Reshape(target_shape=(48,out_features))(result)
    
    return Model(inputs=input,outputs=result)

In [12]:
def seq2seq(units,bn,drop=0.0,channel=1):
    encoder_inputs = layers.Input(shape=(72,10))

    encoder = GRU(units, dropout=drop,return_state=True)
    _,encoder_states = encoder(encoder_inputs)
    if bn:
        encoder_states=layers.BatchNormalization()(encoder_states)
    decoder=layers.RepeatVector(48)(encoder_states)
    decoder_gru = GRU(units, dropout=drop, return_sequences=True, return_state=False)
    decoder = decoder_gru(decoder, initial_state=encoder_states)
    
    out = layers.TimeDistributed(Dense(channel))(decoder)
    return models.Model(encoder_inputs, out)

In [13]:
def gatedDNN():
    model = Sequential()
    model.add(layers.Input(shape=(76,)))
    model.add(layers.Dense(2, activation='relu'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [15]:
def loadToModel(files, formating, pathFiles, postfx, params, predicts):
    T = 72
    D = 1
    X = []
    Y = []
    for fileName in files:
        accuracy = 0
        df = pd.read_csv(pathFiles + fileName + postfx + formating, sep=',')
        #df['pm10'] = df['pm10'].astype(int)
        df['pm10'] = df['pm10'].map(lambda a: pm10_index(a))
        df['pm10'] = df['pm10'].astype(int)
        df['date_time'] = df['date_time'].astype("|S")
        df['season'] = df['season'].astype("|S")
        df['forecast_windDirection'] = df['forecast_windDirection'].astype("|S")
        df['real_winddir'] = df['real_winddir'].astype("|S")
        df['date_time'] = df['date_time'].map(lambda a: get_time(a))
        df['date_time'] = df['date_time'].astype(np.float64)
        df['season'] = df['season'].map(lambda a: get_season(a))
        df['season'] = df['season'].astype(np.float64)
        df['forecast_windDirection'] = df['forecast_windDirection'].map(lambda a: get_winddir(a))
        df['forecast_windDirection'] = df['forecast_windDirection'].astype(np.float64)
        df['real_winddir'] = df['real_winddir'].map(lambda a: get_winddir(a))
        df['real_winddir'] = df['real_winddir'].astype(np.float64)
        #df['pm10'] = df['pm10'].astype('int')
        df['station_id'] = df['station_id'].astype(np.float64)
        df['forecast_tempC'] = df['forecast_tempC'].astype(np.float64)
        df['forecast_humidity'] = df['forecast_humidity'].astype(np.float64)
        X,y = create_xy(df[['station_id','season','real_temp','real_humidity','real_windspd','real_winddir','forecast_tempC','forecast_humidity','forecast_windSpeed','forecast_windDirection']],df[['pm10']],72, 48)
        accuracy = 0.0
        print(fileName + ' begins prediction')
#        model = GradientBoostingRegressor(learning_rate=0.05,max_features=0.6,max_leaf_nodes=31,n_estimators=200)
        model = seq2seq(10,False,0.5,1)
        model.compile(loss='mse', optimizer=RMSprop(),metrics=['accuracy'])
#        model.compile(optimizer=RMSprop(), loss='mse')
        
        #model = MultiOutputRegressor(gbm,-1)
        #model = lgb.LGBMRegressor()
        #model = XGBClassifier()
        #model = XGBClassifier(gamma=0.1, max_depth=50, n_estimators=80, learning_rate=0.1, colsample_bytree=.7, reg_alpha=4, objective='binary:logistic', eta=0.3, silent=1, subsample=0.8)
        kf = KFold(n_splits=5)

#         nsamples, nx, ny = np.array(X).shape
#         X = X.reshape((nsamples,nx*ny))
#         nsamples2, nx2 = np.array(y).shape
#         y = y.reshape((nsamples2,nx2))
        print(X.shape)
        print(y.shape)
        #encoder1 = LabelEncoder() 
        for trainI,testI in kf.split(X):
            model.fit(X[trainI],y[trainI])
            predictions = model.predict(X[testI])
#             print(predictions)
#             print("*************")
#             print(y[testI])
#            accuracy += accuracy_score(y[testI], predictions)
        
#         _, test_acc = model.evaluate(X,y,verbose=0)
#         accuracy = test_acc
#         print('Accuracy ' + fileName + ': %.3f' % (accuracy/5))

In [16]:
predicts = []

In [17]:
loadToModel(file_names, files_format, files_path, files_postfix, parameters,predicts)

AgiaParaskevi begins prediction
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
(43729, 72, 10)
(43729, 48, 1)
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 34983 samples
Train on 34983 samples
Train on 34983 samples
Train on 34983 samples
Train on 34984 samples
Aristotelous begins prediction
(43729, 72, 10)
(43729, 48, 1)
Train on 34983 samples
Train on 34983 samples
Train on 34983 samples
Train on 34983 samples
Train on 34984 samples
Athens begins prediction
(43729, 72, 10)
(43729, 48, 1)
Train on 34983 samples
Train on 34983 samples
Train on 34983 samples
Train on 34983 samples
Train on 34984 samples
Elefsina begins prediction
(43729, 72, 10)
(43729, 48, 1)
Train on 34983 samples
Train on 34983 samples
Train on 34983 samples
Train on 34983 samples
Train on 34984 samples
Koropi begins prediction
(43729, 72, 10)
(43729, 48, 1)
Train on 34983 samples
Train on 34983 samples
Train on 34983 samples
T

Thrakomakedones begins prediction
(43729, 72, 10)
(43729, 48, 1)
Train on 34983 samples
Train on 34983 samples
Train on 34983 samples
Train on 34983 samples
Train on 34984 samples
