In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date, datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from xgboost import XGBRegressor, XGBClassifier
import warnings
warnings.filterwarnings('ignore')

#import local functions
import data_process as proc
import data_preperation as prep
import utils_scenario as utils

In [3]:
def run_xgb(steps_in, steps_out):
    #Parameter list:
    param_list =['speed','cos_wind_dir','sin_wind_dir']

    predict = pd.DataFrame(columns={'speed','cos_wind_dir','sin_wind_dir'})
    true = pd.DataFrame(columns={'speed','cos_wind_dir','sin_wind_dir'})
    baseline = pd.DataFrame(columns={'speed','cos_wind_dir','sin_wind_dir'})

    for param in param_list:
        x_df, y_df, x, y = proc.prepare_x_y(measurement, forecast, steps_in, steps_out, param)
        X_train, X_test, y_train, y_test= train_test_split(x, y, test_size=0.2, shuffle = False)
        xg = XGBRegressor(max_depth = 5)
        xg.fit(X_train, y_train)
        y_baseline = x_df
        y_hat = xg.predict(X_test)

        predict[param] = pd.Series(y_hat)
        true[param] = pd.Series(y_test.flatten())
        baseline[param] = x_df[param+'_forecast'][-len(y_hat):]

    #reset index
    baseline.reset_index(inplace=True)
    return predict, true, baseline

def scenario_accuracy(predict, true, baseline):
    pred = utils.get_all_scenarios(predict['speed'], predict['cos_wind_dir'],predict['sin_wind_dir'], b_scenarios=True)
    true = utils.get_all_scenarios(true['speed'], true['cos_wind_dir'],true['sin_wind_dir'], b_scenarios=True)
    base = utils.get_all_scenarios(baseline['speed'], baseline['cos_wind_dir'],baseline['sin_wind_dir'], b_scenarios=True)

    #calculate prediction accuracies
    pred_score = metrics.accuracy_score(pred, true).round(3)
    base_score = metrics.accuracy_score(base, true).round(3)

    return  pred_score, base_score

def binary_accuracy(predict, true, baseline):
    pred = utils.get_all_dangerous_scenarios(predict['speed'], predict['cos_wind_dir'],predict['sin_wind_dir'])
    true = utils.get_all_dangerous_scenarios(true['speed'], true['cos_wind_dir'],true['sin_wind_dir'])
    base = utils.get_all_dangerous_scenarios(baseline['speed'], baseline['cos_wind_dir'],baseline['sin_wind_dir'])

    #calculate prediction accuracies
    pred_score = metrics.accuracy_score(pred, true).round(3)
    base_score = metrics.accuracy_score(base, true).round(3)
    #calculate auc
    pred_auc = metrics.roc_auc_score(pred, true).round(3)
    base_auc = metrics.roc_auc_score(base, true).round(3)
    return  pred_score, base_score, pred_auc, base_auc

def get_mae(predict, true, baseline):
    speed = metrics.mean_absolute_error(predict['speed'], true['speed'])
    speed_base=metrics.mean_absolute_error(baseline['speed'], true['speed'])
    angle = metrics.mean_absolute_error(predict['angle'], true['angle'])
    angle_base=metrics.mean_absolute_error(baseline['angle'], true['angle'])
    return speed, speed_base, angle, angle_base 
    

In [4]:
#get data
measurement=prep.prepare_measurement()
forecast = prep.prepare_forecast()
#keep useful columns
measurement= measurement[['speed', 'cos_wind_dir', 'sin_wind_dir', 'temp', 'radiation', 'precip','season', 'am']]

#set up empty dataframes
accuracy = pd.DataFrame(columns={})
pred_speed=pd.DataFrame(columns={})
pred_angle=pd.DataFrame(columns={})

#prediction steps
t_list=[1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48]
steps_in=48

for t in t_list:
    #run model
    predict, true, base = run_xgb(steps_in, steps_out=t)
    
    #calculate angles from sin and cosine  
    predict['angle'] = predict.apply(lambda row : utils.get_angle_in_degree(row['cos_wind_dir'],row['sin_wind_dir']), axis = 1)
    true['angle'] = true.apply(lambda row : utils.get_angle_in_degree(row['cos_wind_dir'],row['sin_wind_dir']), axis = 1)
    base['angle'] = base.apply(lambda row : utils.get_angle_in_degree(row['cos_wind_dir'],row['sin_wind_dir']), axis = 1)
    
    #calculate mae for regression 
    mae_speed, mae_speed_base, mae_angle, mae_angle_base = get_mae(predict, true, base) 
    #calculate accuracy & auc for scenario prediction 
    pred_scenario, base_scenario  = scenario_accuracy(predict, true, base)
    pred_bin_accu, base_bin_accu, pred_bin_auc, base_bin_auc= binary_accuracy(predict, true, base)
    
    
    #record accuracy
    accuracy = accuracy.append({'past_n_steps': str(steps_in),
                                      'pred_n_steps': str(t),
                                      'xgb_scenario_accu': pred_scenario,
                                      'base_scenario_accu': base_scenario,
                                      'xbg_binary_accu':pred_bin_accu,
                                      'base_binary_accu':base_bin_accu,
                                        'xbg_binary_auc':pred_bin_auc,
                                      'base_binary_auc':base_bin_auc,
                                        'xbg_speed_mae': mae_speed,
                                        'base_speed_mae': mae_speed_base,
                                        'xgb_angle_mae': mae_angle,
                                        'base_angle_mae': mae_angle_base}, ignore_index=True)
    #record predicted speed
    pred_speed = pd.concat([pred_speed, predict['speed'].rename('speed_t+'+str(t))], axis=1)
    #record predicted angle
    pred_angle = pd.concat([pred_angle, predict['angle'].rename('angle_t+'+str(t))], axis=1)


read csv semester csv files from 2015s2 to 2020s1
smooth wind direction
generate seasonality categorical feature
generate am/pm categorical feature
reading forecast data
smooth wind direction


In [7]:
accuracy

Unnamed: 0,base_angle_mae,base_binary_accu,base_binary_auc,base_scenario_accu,base_speed_mae,past_n_steps,pred_n_steps,xbg_binary_accu,xbg_binary_auc,xbg_speed_mae,xgb_angle_mae,xgb_scenario_accu
0,44.290238,0.811,0.511,0.665,1.715838,1,1,0.981,0.837,0.426612,16.56588,0.892
