In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from xgboost import XGBRegressor
from datetime import timedelta
import pickle
import warnings
warnings.filterwarnings('ignore')

#import local functions
from utils import utils_scenario as utils, data_preparation as prep, data_process as proc

In [29]:
#prepare raw data into same format as training df 
def prepare_x_test(measurement, forecast, past_n_steps, pred_period):
    
    #concat past n steps from measurement 
    df = measurement.set_index('datetime')
    df=proc.get_past_n_steps(df, past_n_steps)

    #calculate forecast_time
    df['forecast_time'] = df['present_time']+timedelta(hours=pred_period)

    #join forecast according to forecast time 
    forecast = forecast.set_index('f_date') 
    forecast = forecast.add_suffix('_forecast')
    df = pd.merge(df, forecast, how = 'left', left_on = 'forecast_time', right_on ='f_date')
    #add cos day
    df = proc.smooth_day_hour(df)
    #fill missing forecasts as 0
    df.fillna(value=0, inplace=True) 
    #keep first row 
    df = df[:-past_n_steps]
    #drop timestamp columns
    df_out = df.drop(['present_time','forecast_time'], axis=1)
    return df_out

# test_df = prepare_x_test(measurement, forecast, past_steps, predict )


In [30]:
#read test data 
measurement = pd.read_csv("data/test_measurement.csv")
forecast = pd.read_csv("data/test_forecast.csv")
measurement['datetime'] = pd.to_datetime(measurement['datetime'], format = '%Y-%m-%d %H:%M:%S')#change to datetime format 
forecast['f_date']= pd.to_datetime(forecast['f_date'], format = '%Y-%m-%d %H:%M:%S')#change to datetime format 

In [27]:
# df=proc.get_past_n_steps(df, 48)
# df['present_time']


In [28]:
#concat past n steps from measurement 
# measurement = measurement.set_index('datetime')
# forecast = forecast.set_index('f_date') 
forecast['f_period']=8
prepare_x_test(measurement, forecast, 48, 6).columns

Index(['speed_t-48', 'cos_wind_dir_t-48', 'sin_wind_dir_t-48', 'temp_t-48',
       'radiation_t-48', 'precip_t-48', 'season_t-48', 'speed_t-47',
       'cos_wind_dir_t-47', 'sin_wind_dir_t-47',
       ...
       'season_t-0', 'speed_forecast', 'temp_forecast', 'rad_forecast',
       'precip_forecast', 'cos_wind_dir_forecast', 'sin_wind_dir_forecast',
       'f_period_forecast', 'cos_day', 'cos_hour'],
      dtype='object', length=352)

In [4]:
result  = pd.DataFrame(columns=['past_n_steps','pred_period','speed', 'cos_wind_dir','sin_wind_dir']) 
pred_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48]
past_n_steps = 48
for pred in pred_list: 
    #prepare data to be the same format as training data 
    x_test = prepare_x_test(measurement, forecast, past_n_steps, pred)
    x_test= np.array(x_test) #change to array 
    
    #read 3 models for speed, cos_wind, sin_wind
    xgb1= pickle.load(open('trained_models/speed_t_'+str(pred), 'rb'))
    xgb2 = pickle.load(open('trained_models/cos_wind_dir_t_'+str(pred), 'rb'))
    xgb3 = pickle.load(open('trained_models/sin_wind_dir_t_'+str(pred), 'rb'))
    
    #predict 
    speed = xgb1.predict(x_test)[0]
    cos_wind = xgb2.predict(x_test)[0]
    sin_wind = xgb3.predict(x_test)[0]
    
    #record accuracy
    result = result.append({'past_n_steps': str(past_n_steps),
                            'pred_period': str(pred),
                            'speed':round(speed,3),
                            'cos_wind_dir':cos_wind,
                            'sin_wind_dir':sin_wind}, ignore_index=True)    
    
#convert cos and sin to wind_dir:
result['wind_dir'] = result.apply(lambda row: utils.get_angle_in_degree(row['cos_wind_dir'],row['sin_wind_dir']),axis = 1)
# result.to_csv('results/test_prediction.csv') 

In [6]:
result