In [1]:
### Notebook modified from arthur girard - agirard@ethz.ch (https://gitlab.ethz.ch/arthurgirard/datadriveturbineperformanceanalysis/-/blob/main/Simple_ANN_CustomLoss_commented.py)

In [1]:
### Generate a dataframe for training from .csv file
import pandas as pd
import glob
import numpy as np
import seaborn as sns
color_palette = sns.color_palette()
sns.set_context('talk')
import matplotlib.pyplot as plt
import dswe
from numpy import absolute
from sklearn.model_selection import cross_val_score, train_test_split, RepeatedKFold
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import xgboost as xgb
from scipy.stats import zscore
from sklearn.cluster import DBSCAN

In [2]:
#reads a CSV file from the given file path and returns a Pandas DataFrame 
def generate_dataframe(filepath):
  dataframe = pd.read_csv(filepath)
  return dataframe

In [3]:
### Mapping month and day/night
def map_time(df):
    df['time'] = pd.to_datetime(df['time'], format='%m/%d/%Y %H:%M')
    # Create a new column 'month'
    df['month'] = df['time'].dt.month_name()
    #Create a new column 'day or night'
    #Considering that the day is between 6am and 6pm 
    df['Day.Night'] = df['time'].apply(lambda x: 'Day' if x.hour > 6 and x.hour < 18 else 'Night')
    time_mapping = {"Day": 1, "Night": 0}
    month_mapping =  {'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6,
                  'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12}
    df = df.replace(time_mapping)
    df = df.replace(month_mapping)
    df.drop("time", axis = 1, inplace = True)
    return df

In [4]:
def filter_split_df_input_output(df, list_input, list_output):
    
    db = DBSCAN(eps=0.2, min_samples=10).fit(df[["wind_speed", "power"]])
    labels = db.labels_
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    n_noise_ = list(labels).count(-1)
    
    df = df[db.labels_ == 0]

    x = df[list_input]
    y = df[list_output]
    x.fillna(0)
    y.fillna(0)
    
    return (x, y)

In [5]:
def std_scale(df, df_reference):
    return((df-df_reference.mean())/df_reference.std())

In [6]:
def minmax_scale(df, df_reference):
    return((df-df_reference.min())/(df_reference.max()-df_reference.min()))

In [7]:
def calc_rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true.values - y_pred) ** 2))

In [8]:
def print_performance(x_test, y_test, y_pred):
    plt.figure(figsize = [6,4])
    plt.scatter(x_test['wind_speed'], y_test,color = color_palette[0], label="Measurements",s=0.1)
    plt.scatter(x_test['wind_speed'], y_pred, color = color_palette[1], label="Model",s=0.1)
    plt.xlabel("Wind speed (m/s)")
    plt.ylabel("Output power (normalized)")
    plt.legend()
    sns.despine()
    plt.show()
    
    plt.figure(figsize = [6,4])
    plt.scatter(y_test, y_pred, color = color_palette[0],s=0.1)
    plt.xlabel("Measurements")
    plt.ylabel("Model")
    sns.despine()
    plt.show()
    
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = calc_rmse(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # print the results
    print("MSE:", mse)
    print("MAE:", mae)
    print("RMSE:", rmse)
    print("R2 score:", r2)
    
    return()

In [39]:
def xgb_regressor(x, y, x_test):
    # define model
    model = xgb.XGBRegressor()
    model.fit(x, y)
    y_hat = model.predict(x_test)
    
    ## Uncomment the lines below to plot the feature importance:
    # fi = pd.DataFrame(data=model.feature_importances_,
    #          index=model.feature_names_in_,
    #          columns=['importance'])
    # plt.figure(figsize = [4, 3])
    # fi.sort_values('importance').plot(kind='barh', title='Feature Importance')
    # plt.show()
    # sns.despine()

    return(y_hat)

In [20]:
def xgb_regressor_custom_loss(x, y, x_test, x_val, y_val):
    def custom_loss(y_pred, y_val):
        d = (y_val-y_pred)
        delta = 1  
        scale = 1 + (d / delta) ** 2
        scale_sqrt = np.sqrt(scale)
        grad = d / scale_sqrt + 2 * d
        hess = (1 / scale) / scale_sqrt + 2
        return grad, hess
    
    model = xgb.XGBRegressor(n_estimators=5000,
                       early_stopping_rounds=100,
                       objective = custom_loss,
                       max_depth=3,
                       eval_metric = 'rmse',
                       learning_rate=0.01)
    model.fit(x, y,
        eval_set=[(x, y), (x_val, y_val)], verbose = 500)
    y_hat = model.predict(x_test)
    return(y_hat)

In [14]:
'''
This is dbscan with xgb default and all parameters
'''


# modify the number below accordingly
#Participant ID number
id_number = 9
#Submission Number
Submission_number = 1

# load all the .csv file in the folder that end with "training"
# define the file paths of the CSV files 
path_training = r'../dataset/kelmarsh/training/*training.csv'
path2 = r'../dataset/penmanshiel/training/*training.csv'
files = glob.glob(path_training)+ glob.glob(path2)
# dataframe_training = pd.DataFrame()

column_list_input = ['wind_speed', 'wind_speed_sensor1', 'wind_speed_sensor1_SD', 'wind_speed_sensor2', 'wind_speed_sensor2_SD', 'density_adjusted_wind_speed', 'wind_direction', 'nacelle_position', 'wind_direction_SD', 'nacelle_position_SD', 'nacelle_ambient_temperature', 'TI','Day.Night']
column_list_ouput = ['power']

In [13]:
for file in files:
    dataframe_training = generate_dataframe(file)
    current_csv_file = file.split("/")[4]
    print(current_csv_file)
    
    dataframe_training = map_time(dataframe_training)    
    dataframe_training_scaled = std_scale(dataframe_training, dataframe_training)
    x_train, y_train = filter_split_df_input_output(dataframe_training_scaled, column_list_input, column_list_ouput)
    
    # load the real test data
    dataframe_test = pd.DataFrame()
    test_file_name = file.replace("training", "test")
    dataframe_test = generate_dataframe(test_file_name)
    dataframe_test = map_time(dataframe_test)  
    dataframe_test_scaled = std_scale(dataframe_test[column_list_input], dataframe_training[column_list_input])

    y_pred = xgb_regressor(x_train, y_train, dataframe_test_scaled)
    predicted_power_rescaled = (y_pred * dataframe_training[column_list_ouput].values.std()) + dataframe_training[column_list_ouput].values.mean()

    df_test = pd.read_csv(test_file_name)
    # create a numpy array of float32 of shape [None, 1]
    # fill the second column with the prediction array
    df_test.iloc[:, 1] = predicted_power_rescaled
    # save the DataFrame as a new csv file
    csv_name = test_file_name.split("/")[4]
    prefix = str(id_number) + '_#' + str(Submission_number) + '_'
    new_file_name = '/'.join(test_file_name.split("/")[0:4]) + "/" + prefix + csv_name
    # print(new_file_name)
    df_test.to_csv(new_file_name, index=False)


Kel_df2_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df6_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df4_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df1_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df5_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df3_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df13_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df8_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df15_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df14_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df1_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df4_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df11_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df5_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df10_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df12_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df2_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df9_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df7_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df6_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


In [17]:
'''
This is dbscan with xgb default and only wind parameters
'''


# modify the number below accordingly
#Participant ID number
id_number = 9
#Submission Number
Submission_number = 2

# load all the .csv file in the folder that end with "training"
# define the file paths of the CSV files 
path_training = r'../dataset/kelmarsh/training/*training.csv'
path2 = r'../dataset/penmanshiel/training/*training.csv'
files = glob.glob(path_training)+ glob.glob(path2)
# dataframe_training = pd.DataFrame()

column_list_input = ['wind_speed', 'wind_speed_sensor1', 'wind_speed_sensor1_SD', 'wind_speed_sensor2', 'wind_speed_sensor2_SD', 'density_adjusted_wind_speed']
column_list_ouput = ['power']

In [18]:
for file in files:
    dataframe_training = generate_dataframe(file)
    current_csv_file = file.split("/")[4]
    print(current_csv_file)
    
    dataframe_training = map_time(dataframe_training)    
    dataframe_training_scaled = std_scale(dataframe_training, dataframe_training)
    x_train, y_train = filter_split_df_input_output(dataframe_training_scaled, column_list_input, column_list_ouput)

    
    # load the real test data
    dataframe_test = pd.DataFrame()
    test_file_name = file.replace("training", "test")
    dataframe_test = generate_dataframe(test_file_name)
    dataframe_test = map_time(dataframe_test)  
    dataframe_test_scaled = std_scale(dataframe_test[column_list_input], dataframe_training[column_list_input])

    y_pred = xgb_regressor(x_train, y_train, dataframe_test_scaled)
    predicted_power_rescaled = (y_pred * dataframe_training[column_list_ouput].values.std()) + dataframe_training[column_list_ouput].values.mean()

    df_test = pd.read_csv(test_file_name)
    # create a numpy array of float32 of shape [None, 1]
    # fill the second column with the prediction array
    df_test.iloc[:, 1] = predicted_power_rescaled
    # save the DataFrame as a new csv file
    csv_name = test_file_name.split("/")[4]
    prefix = str(id_number) + '_#' + str(Submission_number) + '_'
    new_file_name = '/'.join(test_file_name.split("/")[0:4]) + "/" + prefix + csv_name
    # print(new_file_name)
    df_test.to_csv(new_file_name, index=False)


Kel_df2_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df6_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df4_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df1_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df5_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df3_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df13_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df8_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df15_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df14_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df1_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df4_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df11_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df5_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df10_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df12_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df2_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df9_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df7_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df6_training.csv


  df['month'] = df['time'].dt.month_name()
  df_test.iloc[:, 1] = predicted_power_rescaled


In [22]:
'''
This is dbscan with xgb custom loss and all parameters
'''

# modify the number below accordingly
#Participant ID number
id_number = 9
#Submission Number
Submission_number = 3

# load all the .csv file in the folder that end with "training"
# define the file paths of the CSV files 
path_training = r'../dataset/kelmarsh/training/*training.csv'
path2 = r'../dataset/penmanshiel/training/*training.csv'
files = glob.glob(path_training)+ glob.glob(path2)
# dataframe_training = pd.DataFrame()

column_list_input = ['wind_speed', 'wind_speed_sensor1', 'wind_speed_sensor1_SD', 'wind_speed_sensor2', 'wind_speed_sensor2_SD', 'density_adjusted_wind_speed', 'wind_direction', 'nacelle_position', 'wind_direction_SD', 'nacelle_position_SD', 'nacelle_ambient_temperature', 'TI','Day.Night']
column_list_ouput = ['power']

In [23]:
for file in files:
    dataframe_training = generate_dataframe(file)
    current_csv_file = file.split("/")[4]
    print(current_csv_file)
    
    dataframe_training = map_time(dataframe_training)    
    dataframe_training_scaled = std_scale(dataframe_training, dataframe_training)
    train_dataframe, val_dataframe = train_test_split(dataframe_training_scaled, test_size=0.2, random_state=12)
    x_train, y_train = filter_split_df_input_output(dataframe_training_scaled, column_list_input, column_list_ouput)
    x_val, y_val = filter_split_df_input_output(val_dataframe, column_list_input, column_list_ouput)
    
    # load the real test data
    dataframe_test = pd.DataFrame()
    test_file_name = file.replace("training", "test")
    dataframe_test = generate_dataframe(test_file_name)
    dataframe_test = map_time(dataframe_test)  
    dataframe_test_scaled = std_scale(dataframe_test[column_list_input], dataframe_training[column_list_input])

    y_pred = xgb_regressor_custom_loss(x_train, y_train, dataframe_test_scaled, x_val, y_val)
    predicted_power_rescaled = (y_pred * dataframe_training[column_list_ouput].values.std()) + dataframe_training[column_list_ouput].values.mean()

    df_test = pd.read_csv(test_file_name)
    # create a numpy array of float32 of shape [None, 1]
    # fill the second column with the prediction array
    df_test.iloc[:, 1] = predicted_power_rescaled
    # save the DataFrame as a new csv file
    csv_name = test_file_name.split("/")[4]
    prefix = str(id_number) + '_#' + str(Submission_number) + '_'
    new_file_name = '/'.join(test_file_name.split("/")[0:4]) + "/" + prefix + csv_name
    # print(new_file_name)
    df_test.to_csv(new_file_name, index=False)


Kel_df2_training.csv
[0]	validation_0-rmse:1.10538	validation_1-rmse:1.10806


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.07680	validation_1-rmse:0.07563
[1000]	validation_0-rmse:0.07267	validation_1-rmse:0.07162
[1500]	validation_0-rmse:0.07053	validation_1-rmse:0.06953
[2000]	validation_0-rmse:0.06904	validation_1-rmse:0.06807
[2500]	validation_0-rmse:0.06791	validation_1-rmse:0.06699
[3000]	validation_0-rmse:0.06699	validation_1-rmse:0.06614
[3500]	validation_0-rmse:0.06615	validation_1-rmse:0.06536
[4000]	validation_0-rmse:0.06562	validation_1-rmse:0.06490
[4500]	validation_0-rmse:0.06512	validation_1-rmse:0.06447
[4999]	validation_0-rmse:0.06468	validation_1-rmse:0.06408


  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df6_training.csv
[0]	validation_0-rmse:1.10518	validation_1-rmse:1.10150


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.08417	validation_1-rmse:0.08405
[1000]	validation_0-rmse:0.07856	validation_1-rmse:0.07857
[1500]	validation_0-rmse:0.07639	validation_1-rmse:0.07627
[2000]	validation_0-rmse:0.07476	validation_1-rmse:0.07464
[2500]	validation_0-rmse:0.07355	validation_1-rmse:0.07347
[3000]	validation_0-rmse:0.07272	validation_1-rmse:0.07267
[3500]	validation_0-rmse:0.07191	validation_1-rmse:0.07189
[4000]	validation_0-rmse:0.07134	validation_1-rmse:0.07132
[4500]	validation_0-rmse:0.07071	validation_1-rmse:0.07072
[4999]	validation_0-rmse:0.07024	validation_1-rmse:0.07030


  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df4_training.csv
[0]	validation_0-rmse:1.10535	validation_1-rmse:1.10723


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.07462	validation_1-rmse:0.07385
[1000]	validation_0-rmse:0.07048	validation_1-rmse:0.06995
[1500]	validation_0-rmse:0.06889	validation_1-rmse:0.06842
[2000]	validation_0-rmse:0.06748	validation_1-rmse:0.06704
[2500]	validation_0-rmse:0.06622	validation_1-rmse:0.06570
[3000]	validation_0-rmse:0.06518	validation_1-rmse:0.06465
[3500]	validation_0-rmse:0.06441	validation_1-rmse:0.06385
[4000]	validation_0-rmse:0.06378	validation_1-rmse:0.06322
[4500]	validation_0-rmse:0.06330	validation_1-rmse:0.06276
[4999]	validation_0-rmse:0.06285	validation_1-rmse:0.06230


  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df1_training.csv
[0]	validation_0-rmse:1.10525	validation_1-rmse:1.10619


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.07526	validation_1-rmse:0.07412
[1000]	validation_0-rmse:0.07093	validation_1-rmse:0.06961
[1500]	validation_0-rmse:0.06931	validation_1-rmse:0.06799
[2000]	validation_0-rmse:0.06811	validation_1-rmse:0.06681
[2500]	validation_0-rmse:0.06714	validation_1-rmse:0.06585
[3000]	validation_0-rmse:0.06650	validation_1-rmse:0.06526
[3500]	validation_0-rmse:0.06584	validation_1-rmse:0.06464
[4000]	validation_0-rmse:0.06528	validation_1-rmse:0.06411
[4500]	validation_0-rmse:0.06479	validation_1-rmse:0.06367
[4999]	validation_0-rmse:0.06440	validation_1-rmse:0.06331


  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df5_training.csv
[0]	validation_0-rmse:1.10532	validation_1-rmse:1.10419


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.07853	validation_1-rmse:0.07708
[1000]	validation_0-rmse:0.07350	validation_1-rmse:0.07244
[1500]	validation_0-rmse:0.07141	validation_1-rmse:0.07069
[2000]	validation_0-rmse:0.07011	validation_1-rmse:0.06958
[2500]	validation_0-rmse:0.06905	validation_1-rmse:0.06858
[3000]	validation_0-rmse:0.06821	validation_1-rmse:0.06781
[3500]	validation_0-rmse:0.06762	validation_1-rmse:0.06726
[4000]	validation_0-rmse:0.06713	validation_1-rmse:0.06677
[4500]	validation_0-rmse:0.06668	validation_1-rmse:0.06633
[4999]	validation_0-rmse:0.06623	validation_1-rmse:0.06588


  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df3_training.csv
[0]	validation_0-rmse:1.10532	validation_1-rmse:1.11021


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.08581	validation_1-rmse:0.08442
[1000]	validation_0-rmse:0.08021	validation_1-rmse:0.07887
[1500]	validation_0-rmse:0.07800	validation_1-rmse:0.07650
[2000]	validation_0-rmse:0.07661	validation_1-rmse:0.07502
[2500]	validation_0-rmse:0.07525	validation_1-rmse:0.07363
[3000]	validation_0-rmse:0.07422	validation_1-rmse:0.07258
[3500]	validation_0-rmse:0.07326	validation_1-rmse:0.07159
[4000]	validation_0-rmse:0.07248	validation_1-rmse:0.07083
[4500]	validation_0-rmse:0.07185	validation_1-rmse:0.07023
[4999]	validation_0-rmse:0.07125	validation_1-rmse:0.06966


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df13_training.csv
[0]	validation_0-rmse:1.10581	validation_1-rmse:1.11128


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.05714	validation_1-rmse:0.05580
[1000]	validation_0-rmse:0.05281	validation_1-rmse:0.05168
[1500]	validation_0-rmse:0.05185	validation_1-rmse:0.05083
[2000]	validation_0-rmse:0.05113	validation_1-rmse:0.05019
[2500]	validation_0-rmse:0.05050	validation_1-rmse:0.04954
[3000]	validation_0-rmse:0.04988	validation_1-rmse:0.04892
[3500]	validation_0-rmse:0.04927	validation_1-rmse:0.04833
[4000]	validation_0-rmse:0.04873	validation_1-rmse:0.04779
[4500]	validation_0-rmse:0.04824	validation_1-rmse:0.04735
[4999]	validation_0-rmse:0.04786	validation_1-rmse:0.04702


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df8_training.csv
[0]	validation_0-rmse:1.10558	validation_1-rmse:1.10701


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.05930	validation_1-rmse:0.05928
[1000]	validation_0-rmse:0.05523	validation_1-rmse:0.05548
[1500]	validation_0-rmse:0.05420	validation_1-rmse:0.05458
[2000]	validation_0-rmse:0.05333	validation_1-rmse:0.05376
[2500]	validation_0-rmse:0.05263	validation_1-rmse:0.05303
[3000]	validation_0-rmse:0.05200	validation_1-rmse:0.05228
[3500]	validation_0-rmse:0.05142	validation_1-rmse:0.05161
[4000]	validation_0-rmse:0.05089	validation_1-rmse:0.05103
[4500]	validation_0-rmse:0.05047	validation_1-rmse:0.05057
[4999]	validation_0-rmse:0.05004	validation_1-rmse:0.05013


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df15_training.csv
[0]	validation_0-rmse:1.10575	validation_1-rmse:1.11728


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06938	validation_1-rmse:0.06834
[1000]	validation_0-rmse:0.06456	validation_1-rmse:0.06375
[1500]	validation_0-rmse:0.06261	validation_1-rmse:0.06196
[2000]	validation_0-rmse:0.06113	validation_1-rmse:0.06060
[2500]	validation_0-rmse:0.05990	validation_1-rmse:0.05954
[3000]	validation_0-rmse:0.05902	validation_1-rmse:0.05874
[3500]	validation_0-rmse:0.05816	validation_1-rmse:0.05797
[4000]	validation_0-rmse:0.05743	validation_1-rmse:0.05727
[4500]	validation_0-rmse:0.05676	validation_1-rmse:0.05669
[4999]	validation_0-rmse:0.05617	validation_1-rmse:0.05616


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df14_training.csv
[0]	validation_0-rmse:1.10572	validation_1-rmse:1.11112


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06209	validation_1-rmse:0.06153
[1000]	validation_0-rmse:0.05754	validation_1-rmse:0.05677
[1500]	validation_0-rmse:0.05585	validation_1-rmse:0.05508
[2000]	validation_0-rmse:0.05461	validation_1-rmse:0.05387
[2500]	validation_0-rmse:0.05377	validation_1-rmse:0.05307
[3000]	validation_0-rmse:0.05305	validation_1-rmse:0.05242
[3500]	validation_0-rmse:0.05245	validation_1-rmse:0.05185
[4000]	validation_0-rmse:0.05189	validation_1-rmse:0.05136
[4500]	validation_0-rmse:0.05141	validation_1-rmse:0.05094
[4999]	validation_0-rmse:0.05096	validation_1-rmse:0.05056


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df1_training.csv
[0]	validation_0-rmse:1.10541	validation_1-rmse:1.11497


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06028	validation_1-rmse:0.05767
[1000]	validation_0-rmse:0.05560	validation_1-rmse:0.05324
[1500]	validation_0-rmse:0.05429	validation_1-rmse:0.05212
[2000]	validation_0-rmse:0.05337	validation_1-rmse:0.05126
[2500]	validation_0-rmse:0.05248	validation_1-rmse:0.05041
[3000]	validation_0-rmse:0.05177	validation_1-rmse:0.04975
[3500]	validation_0-rmse:0.05114	validation_1-rmse:0.04918
[4000]	validation_0-rmse:0.05053	validation_1-rmse:0.04865
[4500]	validation_0-rmse:0.05000	validation_1-rmse:0.04818
[4999]	validation_0-rmse:0.04955	validation_1-rmse:0.04779


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df4_training.csv
[0]	validation_0-rmse:1.10553	validation_1-rmse:1.10638


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.05959	validation_1-rmse:0.05889
[1000]	validation_0-rmse:0.05504	validation_1-rmse:0.05434
[1500]	validation_0-rmse:0.05379	validation_1-rmse:0.05316
[2000]	validation_0-rmse:0.05288	validation_1-rmse:0.05228
[2500]	validation_0-rmse:0.05216	validation_1-rmse:0.05161
[3000]	validation_0-rmse:0.05156	validation_1-rmse:0.05106
[3500]	validation_0-rmse:0.05100	validation_1-rmse:0.05051
[4000]	validation_0-rmse:0.05052	validation_1-rmse:0.05005
[4500]	validation_0-rmse:0.05008	validation_1-rmse:0.04964
[4999]	validation_0-rmse:0.04969	validation_1-rmse:0.04927


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df11_training.csv
[0]	validation_0-rmse:1.10585	validation_1-rmse:1.11292


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06039	validation_1-rmse:0.05836
[1000]	validation_0-rmse:0.05568	validation_1-rmse:0.05381
[1500]	validation_0-rmse:0.05430	validation_1-rmse:0.05260
[2000]	validation_0-rmse:0.05328	validation_1-rmse:0.05167
[2500]	validation_0-rmse:0.05250	validation_1-rmse:0.05096
[3000]	validation_0-rmse:0.05172	validation_1-rmse:0.05022
[3500]	validation_0-rmse:0.05104	validation_1-rmse:0.04961
[4000]	validation_0-rmse:0.05034	validation_1-rmse:0.04897
[4500]	validation_0-rmse:0.04974	validation_1-rmse:0.04840
[4999]	validation_0-rmse:0.04920	validation_1-rmse:0.04791


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df5_training.csv
[0]	validation_0-rmse:1.10564	validation_1-rmse:1.10921


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06068	validation_1-rmse:0.06002
[1000]	validation_0-rmse:0.05612	validation_1-rmse:0.05549
[1500]	validation_0-rmse:0.05471	validation_1-rmse:0.05409
[2000]	validation_0-rmse:0.05376	validation_1-rmse:0.05317
[2500]	validation_0-rmse:0.05308	validation_1-rmse:0.05249
[3000]	validation_0-rmse:0.05253	validation_1-rmse:0.05194
[3500]	validation_0-rmse:0.05199	validation_1-rmse:0.05142
[4000]	validation_0-rmse:0.05149	validation_1-rmse:0.05091
[4500]	validation_0-rmse:0.05103	validation_1-rmse:0.05044
[4999]	validation_0-rmse:0.05063	validation_1-rmse:0.05003


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df10_training.csv
[0]	validation_0-rmse:1.10585	validation_1-rmse:1.10076


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06335	validation_1-rmse:0.06459
[1000]	validation_0-rmse:0.05838	validation_1-rmse:0.05911
[1500]	validation_0-rmse:0.05683	validation_1-rmse:0.05747
[2000]	validation_0-rmse:0.05575	validation_1-rmse:0.05634
[2500]	validation_0-rmse:0.05477	validation_1-rmse:0.05531
[3000]	validation_0-rmse:0.05410	validation_1-rmse:0.05460
[3500]	validation_0-rmse:0.05348	validation_1-rmse:0.05395
[4000]	validation_0-rmse:0.05299	validation_1-rmse:0.05343
[4500]	validation_0-rmse:0.05248	validation_1-rmse:0.05289
[4999]	validation_0-rmse:0.05202	validation_1-rmse:0.05241


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df12_training.csv
[0]	validation_0-rmse:1.10571	validation_1-rmse:1.10942


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06412	validation_1-rmse:0.06396
[1000]	validation_0-rmse:0.05896	validation_1-rmse:0.05840
[1500]	validation_0-rmse:0.05770	validation_1-rmse:0.05710
[2000]	validation_0-rmse:0.05663	validation_1-rmse:0.05605
[2500]	validation_0-rmse:0.05575	validation_1-rmse:0.05517
[3000]	validation_0-rmse:0.05503	validation_1-rmse:0.05447
[3500]	validation_0-rmse:0.05442	validation_1-rmse:0.05393
[4000]	validation_0-rmse:0.05392	validation_1-rmse:0.05344
[4500]	validation_0-rmse:0.05343	validation_1-rmse:0.05297
[4999]	validation_0-rmse:0.05300	validation_1-rmse:0.05254


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df2_training.csv
[0]	validation_0-rmse:1.10567	validation_1-rmse:1.10480


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06338	validation_1-rmse:0.06374
[1000]	validation_0-rmse:0.05875	validation_1-rmse:0.05880
[1500]	validation_0-rmse:0.05731	validation_1-rmse:0.05721
[2000]	validation_0-rmse:0.05593	validation_1-rmse:0.05575
[2500]	validation_0-rmse:0.05482	validation_1-rmse:0.05459
[3000]	validation_0-rmse:0.05397	validation_1-rmse:0.05375
[3500]	validation_0-rmse:0.05333	validation_1-rmse:0.05308
[4000]	validation_0-rmse:0.05268	validation_1-rmse:0.05240
[4500]	validation_0-rmse:0.05213	validation_1-rmse:0.05185
[4999]	validation_0-rmse:0.05157	validation_1-rmse:0.05128


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df9_training.csv
[0]	validation_0-rmse:1.10557	validation_1-rmse:1.10578


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06204	validation_1-rmse:0.06238
[1000]	validation_0-rmse:0.05723	validation_1-rmse:0.05730
[1500]	validation_0-rmse:0.05573	validation_1-rmse:0.05559
[2000]	validation_0-rmse:0.05467	validation_1-rmse:0.05451
[2500]	validation_0-rmse:0.05392	validation_1-rmse:0.05377
[3000]	validation_0-rmse:0.05325	validation_1-rmse:0.05306
[3500]	validation_0-rmse:0.05266	validation_1-rmse:0.05248
[4000]	validation_0-rmse:0.05215	validation_1-rmse:0.05197
[4500]	validation_0-rmse:0.05169	validation_1-rmse:0.05147
[4999]	validation_0-rmse:0.05122	validation_1-rmse:0.05102


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df7_training.csv
[0]	validation_0-rmse:1.10561	validation_1-rmse:1.11270


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06310	validation_1-rmse:0.06271
[1000]	validation_0-rmse:0.05849	validation_1-rmse:0.05814
[1500]	validation_0-rmse:0.05712	validation_1-rmse:0.05685
[2000]	validation_0-rmse:0.05602	validation_1-rmse:0.05572
[2500]	validation_0-rmse:0.05503	validation_1-rmse:0.05471
[3000]	validation_0-rmse:0.05427	validation_1-rmse:0.05394
[3500]	validation_0-rmse:0.05361	validation_1-rmse:0.05326
[4000]	validation_0-rmse:0.05309	validation_1-rmse:0.05274
[4500]	validation_0-rmse:0.05256	validation_1-rmse:0.05225
[4999]	validation_0-rmse:0.05211	validation_1-rmse:0.05183


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df6_training.csv
[0]	validation_0-rmse:1.10561	validation_1-rmse:1.10318


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06088	validation_1-rmse:0.06073
[1000]	validation_0-rmse:0.05661	validation_1-rmse:0.05656
[1500]	validation_0-rmse:0.05529	validation_1-rmse:0.05532
[2000]	validation_0-rmse:0.05426	validation_1-rmse:0.05447
[2500]	validation_0-rmse:0.05330	validation_1-rmse:0.05350
[3000]	validation_0-rmse:0.05255	validation_1-rmse:0.05276
[3500]	validation_0-rmse:0.05189	validation_1-rmse:0.05210
[4000]	validation_0-rmse:0.05131	validation_1-rmse:0.05145
[4500]	validation_0-rmse:0.05081	validation_1-rmse:0.05093
[4999]	validation_0-rmse:0.05036	validation_1-rmse:0.05047


  df_test.iloc[:, 1] = predicted_power_rescaled


In [25]:
'''
This is dbscan with xgb custom loss and only wind parameters
'''

# modify the number below accordingly
#Participant ID number
id_number = 9
#Submission Number
Submission_number = 4

# load all the .csv file in the folder that end with "training"
# define the file paths of the CSV files 
path_training = r'../dataset/kelmarsh/training/*training.csv'
path2 = r'../dataset/penmanshiel/training/*training.csv'
files = glob.glob(path_training)+ glob.glob(path2)
# dataframe_training = pd.DataFrame()

column_list_input = ['wind_speed', 'wind_speed_sensor1', 'wind_speed_sensor1_SD', 'wind_speed_sensor2', 'wind_speed_sensor2_SD', 'density_adjusted_wind_speed']
column_list_ouput = ['power']

In [26]:
for file in files:
    dataframe_training = generate_dataframe(file)
    current_csv_file = file.split("/")[4]
    print(current_csv_file)
    
    dataframe_training = map_time(dataframe_training)    
    dataframe_training_scaled = std_scale(dataframe_training, dataframe_training)
    train_dataframe, val_dataframe = train_test_split(dataframe_training_scaled, test_size=0.2, random_state=12)
    x_train, y_train = filter_split_df_input_output(dataframe_training_scaled, column_list_input, column_list_ouput)
    x_val, y_val = filter_split_df_input_output(val_dataframe, column_list_input, column_list_ouput)

    # load the real test data
    dataframe_test = pd.DataFrame()
    test_file_name = file.replace("training", "test")
    dataframe_test = generate_dataframe(test_file_name)
    dataframe_test = map_time(dataframe_test)  
    dataframe_test_scaled = std_scale(dataframe_test[column_list_input], dataframe_training[column_list_input])

    y_pred = xgb_regressor_custom_loss(x_train, y_train, dataframe_test_scaled, x_val, y_val)
    predicted_power_rescaled = (y_pred * dataframe_training[column_list_ouput].values.std()) + dataframe_training[column_list_ouput].values.mean()

    df_test = pd.read_csv(test_file_name)
    # create a numpy array of float32 of shape [None, 1]
    # fill the second column with the prediction array
    df_test.iloc[:, 1] = predicted_power_rescaled
    # save the DataFrame as a new csv file
    csv_name = test_file_name.split("/")[4]
    prefix = str(id_number) + '_#' + str(Submission_number) + '_'
    new_file_name = '/'.join(test_file_name.split("/")[0:4]) + "/" + prefix + csv_name
    # print(new_file_name)
    df_test.to_csv(new_file_name, index=False)


Kel_df2_training.csv
[0]	validation_0-rmse:1.10538	validation_1-rmse:1.10806


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.08115	validation_1-rmse:0.07996
[1000]	validation_0-rmse:0.07943	validation_1-rmse:0.07828
[1500]	validation_0-rmse:0.07858	validation_1-rmse:0.07749
[2000]	validation_0-rmse:0.07769	validation_1-rmse:0.07667
[2500]	validation_0-rmse:0.07675	validation_1-rmse:0.07578
[3000]	validation_0-rmse:0.07596	validation_1-rmse:0.07506
[3500]	validation_0-rmse:0.07527	validation_1-rmse:0.07439
[4000]	validation_0-rmse:0.07463	validation_1-rmse:0.07373
[4500]	validation_0-rmse:0.07402	validation_1-rmse:0.07308
[4999]	validation_0-rmse:0.07352	validation_1-rmse:0.07260


  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df6_training.csv
[0]	validation_0-rmse:1.10518	validation_1-rmse:1.10150


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.09011	validation_1-rmse:0.09011
[1000]	validation_0-rmse:0.08839	validation_1-rmse:0.08837
[1500]	validation_0-rmse:0.08767	validation_1-rmse:0.08752
[2000]	validation_0-rmse:0.08674	validation_1-rmse:0.08656
[2500]	validation_0-rmse:0.08589	validation_1-rmse:0.08571
[3000]	validation_0-rmse:0.08515	validation_1-rmse:0.08486
[3500]	validation_0-rmse:0.08439	validation_1-rmse:0.08401
[4000]	validation_0-rmse:0.08362	validation_1-rmse:0.08320
[4500]	validation_0-rmse:0.08296	validation_1-rmse:0.08249
[4999]	validation_0-rmse:0.08241	validation_1-rmse:0.08192


  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df4_training.csv
[0]	validation_0-rmse:1.10535	validation_1-rmse:1.10723


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.08068	validation_1-rmse:0.07944
[1000]	validation_0-rmse:0.07872	validation_1-rmse:0.07755
[1500]	validation_0-rmse:0.07765	validation_1-rmse:0.07650
[2000]	validation_0-rmse:0.07638	validation_1-rmse:0.07523
[2500]	validation_0-rmse:0.07523	validation_1-rmse:0.07414
[3000]	validation_0-rmse:0.07430	validation_1-rmse:0.07329
[3500]	validation_0-rmse:0.07351	validation_1-rmse:0.07254
[4000]	validation_0-rmse:0.07284	validation_1-rmse:0.07188
[4500]	validation_0-rmse:0.07216	validation_1-rmse:0.07120
[4999]	validation_0-rmse:0.07154	validation_1-rmse:0.07058


  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df1_training.csv
[0]	validation_0-rmse:1.10525	validation_1-rmse:1.10619


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.08109	validation_1-rmse:0.08027
[1000]	validation_0-rmse:0.07935	validation_1-rmse:0.07858
[1500]	validation_0-rmse:0.07836	validation_1-rmse:0.07755
[2000]	validation_0-rmse:0.07730	validation_1-rmse:0.07647
[2500]	validation_0-rmse:0.07642	validation_1-rmse:0.07559
[3000]	validation_0-rmse:0.07557	validation_1-rmse:0.07472
[3500]	validation_0-rmse:0.07473	validation_1-rmse:0.07387
[4000]	validation_0-rmse:0.07401	validation_1-rmse:0.07313
[4500]	validation_0-rmse:0.07331	validation_1-rmse:0.07243
[4999]	validation_0-rmse:0.07270	validation_1-rmse:0.07185


  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df5_training.csv
[0]	validation_0-rmse:1.10532	validation_1-rmse:1.10419


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.08554	validation_1-rmse:0.08433
[1000]	validation_0-rmse:0.08359	validation_1-rmse:0.08251
[1500]	validation_0-rmse:0.08252	validation_1-rmse:0.08151
[2000]	validation_0-rmse:0.08159	validation_1-rmse:0.08061
[2500]	validation_0-rmse:0.08049	validation_1-rmse:0.07953
[3000]	validation_0-rmse:0.07961	validation_1-rmse:0.07864
[3500]	validation_0-rmse:0.07870	validation_1-rmse:0.07775
[4000]	validation_0-rmse:0.07786	validation_1-rmse:0.07696
[4500]	validation_0-rmse:0.07715	validation_1-rmse:0.07630
[4999]	validation_0-rmse:0.07644	validation_1-rmse:0.07565


  df_test.iloc[:, 1] = predicted_power_rescaled


Kel_df3_training.csv
[0]	validation_0-rmse:1.10532	validation_1-rmse:1.11021


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.09253	validation_1-rmse:0.09089
[1000]	validation_0-rmse:0.09060	validation_1-rmse:0.08877
[1500]	validation_0-rmse:0.08956	validation_1-rmse:0.08768
[2000]	validation_0-rmse:0.08862	validation_1-rmse:0.08682
[2500]	validation_0-rmse:0.08760	validation_1-rmse:0.08574
[3000]	validation_0-rmse:0.08677	validation_1-rmse:0.08493
[3500]	validation_0-rmse:0.08595	validation_1-rmse:0.08420
[4000]	validation_0-rmse:0.08526	validation_1-rmse:0.08357
[4500]	validation_0-rmse:0.08456	validation_1-rmse:0.08296
[4999]	validation_0-rmse:0.08395	validation_1-rmse:0.08239


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df13_training.csv
[0]	validation_0-rmse:1.10581	validation_1-rmse:1.11128


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06370	validation_1-rmse:0.06233
[1000]	validation_0-rmse:0.06122	validation_1-rmse:0.05990
[1500]	validation_0-rmse:0.06066	validation_1-rmse:0.05944
[2000]	validation_0-rmse:0.06011	validation_1-rmse:0.05894
[2500]	validation_0-rmse:0.05962	validation_1-rmse:0.05847
[3000]	validation_0-rmse:0.05901	validation_1-rmse:0.05789
[3500]	validation_0-rmse:0.05842	validation_1-rmse:0.05729
[4000]	validation_0-rmse:0.05785	validation_1-rmse:0.05676
[4500]	validation_0-rmse:0.05728	validation_1-rmse:0.05628
[4999]	validation_0-rmse:0.05679	validation_1-rmse:0.05580


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df8_training.csv
[0]	validation_0-rmse:1.10558	validation_1-rmse:1.10701


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06427	validation_1-rmse:0.06413
[1000]	validation_0-rmse:0.06167	validation_1-rmse:0.06170
[1500]	validation_0-rmse:0.06115	validation_1-rmse:0.06114
[2000]	validation_0-rmse:0.06068	validation_1-rmse:0.06065
[2500]	validation_0-rmse:0.06017	validation_1-rmse:0.06015
[3000]	validation_0-rmse:0.05968	validation_1-rmse:0.05966
[3500]	validation_0-rmse:0.05914	validation_1-rmse:0.05911
[4000]	validation_0-rmse:0.05866	validation_1-rmse:0.05866
[4500]	validation_0-rmse:0.05817	validation_1-rmse:0.05819
[4999]	validation_0-rmse:0.05777	validation_1-rmse:0.05780


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df15_training.csv
[0]	validation_0-rmse:1.10575	validation_1-rmse:1.11728


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.08114	validation_1-rmse:0.07964
[1000]	validation_0-rmse:0.07795	validation_1-rmse:0.07665
[1500]	validation_0-rmse:0.07739	validation_1-rmse:0.07617
[2000]	validation_0-rmse:0.07687	validation_1-rmse:0.07570
[2500]	validation_0-rmse:0.07626	validation_1-rmse:0.07512
[3000]	validation_0-rmse:0.07568	validation_1-rmse:0.07459
[3500]	validation_0-rmse:0.07512	validation_1-rmse:0.07405
[4000]	validation_0-rmse:0.07463	validation_1-rmse:0.07358
[4500]	validation_0-rmse:0.07415	validation_1-rmse:0.07310
[4999]	validation_0-rmse:0.07366	validation_1-rmse:0.07266


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df14_training.csv
[0]	validation_0-rmse:1.10572	validation_1-rmse:1.11112


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06917	validation_1-rmse:0.06873
[1000]	validation_0-rmse:0.06661	validation_1-rmse:0.06582
[1500]	validation_0-rmse:0.06608	validation_1-rmse:0.06523
[2000]	validation_0-rmse:0.06568	validation_1-rmse:0.06482
[2500]	validation_0-rmse:0.06526	validation_1-rmse:0.06443
[3000]	validation_0-rmse:0.06473	validation_1-rmse:0.06394
[3500]	validation_0-rmse:0.06413	validation_1-rmse:0.06345
[4000]	validation_0-rmse:0.06363	validation_1-rmse:0.06300
[4500]	validation_0-rmse:0.06308	validation_1-rmse:0.06252
[4999]	validation_0-rmse:0.06258	validation_1-rmse:0.06201


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df1_training.csv
[0]	validation_0-rmse:1.10541	validation_1-rmse:1.11497


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06610	validation_1-rmse:0.06331
[1000]	validation_0-rmse:0.06360	validation_1-rmse:0.06083
[1500]	validation_0-rmse:0.06308	validation_1-rmse:0.06034
[2000]	validation_0-rmse:0.06261	validation_1-rmse:0.05993
[2500]	validation_0-rmse:0.06208	validation_1-rmse:0.05947
[3000]	validation_0-rmse:0.06155	validation_1-rmse:0.05898
[3500]	validation_0-rmse:0.06099	validation_1-rmse:0.05841
[4000]	validation_0-rmse:0.06052	validation_1-rmse:0.05795
[4500]	validation_0-rmse:0.05997	validation_1-rmse:0.05748
[4999]	validation_0-rmse:0.05943	validation_1-rmse:0.05700


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df4_training.csv
[0]	validation_0-rmse:1.10553	validation_1-rmse:1.10638


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06648	validation_1-rmse:0.06565
[1000]	validation_0-rmse:0.06387	validation_1-rmse:0.06287
[1500]	validation_0-rmse:0.06330	validation_1-rmse:0.06235
[2000]	validation_0-rmse:0.06281	validation_1-rmse:0.06192
[2500]	validation_0-rmse:0.06224	validation_1-rmse:0.06138
[3000]	validation_0-rmse:0.06169	validation_1-rmse:0.06088
[3500]	validation_0-rmse:0.06124	validation_1-rmse:0.06049
[4000]	validation_0-rmse:0.06068	validation_1-rmse:0.05997
[4500]	validation_0-rmse:0.06006	validation_1-rmse:0.05930
[4999]	validation_0-rmse:0.05954	validation_1-rmse:0.05869


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df11_training.csv
[0]	validation_0-rmse:1.10585	validation_1-rmse:1.11292


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.07069	validation_1-rmse:0.06917
[1000]	validation_0-rmse:0.06768	validation_1-rmse:0.06594
[1500]	validation_0-rmse:0.06709	validation_1-rmse:0.06526
[2000]	validation_0-rmse:0.06666	validation_1-rmse:0.06489
[2500]	validation_0-rmse:0.06618	validation_1-rmse:0.06438
[3000]	validation_0-rmse:0.06570	validation_1-rmse:0.06384
[3500]	validation_0-rmse:0.06526	validation_1-rmse:0.06341
[4000]	validation_0-rmse:0.06484	validation_1-rmse:0.06300
[4500]	validation_0-rmse:0.06444	validation_1-rmse:0.06265
[4999]	validation_0-rmse:0.06410	validation_1-rmse:0.06237


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df5_training.csv
[0]	validation_0-rmse:1.10564	validation_1-rmse:1.10921


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06735	validation_1-rmse:0.06691
[1000]	validation_0-rmse:0.06454	validation_1-rmse:0.06381
[1500]	validation_0-rmse:0.06397	validation_1-rmse:0.06320
[2000]	validation_0-rmse:0.06356	validation_1-rmse:0.06285
[2500]	validation_0-rmse:0.06309	validation_1-rmse:0.06246
[3000]	validation_0-rmse:0.06265	validation_1-rmse:0.06205
[3500]	validation_0-rmse:0.06212	validation_1-rmse:0.06157
[4000]	validation_0-rmse:0.06166	validation_1-rmse:0.06114
[4500]	validation_0-rmse:0.06120	validation_1-rmse:0.06072
[4999]	validation_0-rmse:0.06078	validation_1-rmse:0.06036


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df10_training.csv
[0]	validation_0-rmse:1.10585	validation_1-rmse:1.10076


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.07222	validation_1-rmse:0.07384
[1000]	validation_0-rmse:0.06947	validation_1-rmse:0.07104
[1500]	validation_0-rmse:0.06893	validation_1-rmse:0.07048
[2000]	validation_0-rmse:0.06839	validation_1-rmse:0.06998
[2500]	validation_0-rmse:0.06791	validation_1-rmse:0.06948
[3000]	validation_0-rmse:0.06752	validation_1-rmse:0.06910
[3500]	validation_0-rmse:0.06714	validation_1-rmse:0.06874
[4000]	validation_0-rmse:0.06665	validation_1-rmse:0.06823
[4500]	validation_0-rmse:0.06616	validation_1-rmse:0.06773
[4999]	validation_0-rmse:0.06569	validation_1-rmse:0.06726


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df12_training.csv
[0]	validation_0-rmse:1.10571	validation_1-rmse:1.10942


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.07413	validation_1-rmse:0.07372
[1000]	validation_0-rmse:0.07135	validation_1-rmse:0.07091
[1500]	validation_0-rmse:0.07079	validation_1-rmse:0.07031
[2000]	validation_0-rmse:0.07026	validation_1-rmse:0.06974
[2500]	validation_0-rmse:0.06965	validation_1-rmse:0.06917
[3000]	validation_0-rmse:0.06909	validation_1-rmse:0.06862
[3500]	validation_0-rmse:0.06855	validation_1-rmse:0.06806
[4000]	validation_0-rmse:0.06805	validation_1-rmse:0.06757
[4500]	validation_0-rmse:0.06750	validation_1-rmse:0.06708
[4999]	validation_0-rmse:0.06707	validation_1-rmse:0.06670


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df2_training.csv
[0]	validation_0-rmse:1.10567	validation_1-rmse:1.10480


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06968	validation_1-rmse:0.07009
[1000]	validation_0-rmse:0.06720	validation_1-rmse:0.06719
[1500]	validation_0-rmse:0.06666	validation_1-rmse:0.06661
[2000]	validation_0-rmse:0.06612	validation_1-rmse:0.06609
[2500]	validation_0-rmse:0.06561	validation_1-rmse:0.06562
[3000]	validation_0-rmse:0.06519	validation_1-rmse:0.06524
[3500]	validation_0-rmse:0.06477	validation_1-rmse:0.06482
[4000]	validation_0-rmse:0.06424	validation_1-rmse:0.06431
[4500]	validation_0-rmse:0.06372	validation_1-rmse:0.06384
[4999]	validation_0-rmse:0.06329	validation_1-rmse:0.06342


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df9_training.csv
[0]	validation_0-rmse:1.10557	validation_1-rmse:1.10578


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.07022	validation_1-rmse:0.07070
[1000]	validation_0-rmse:0.06779	validation_1-rmse:0.06823
[1500]	validation_0-rmse:0.06717	validation_1-rmse:0.06763
[2000]	validation_0-rmse:0.06660	validation_1-rmse:0.06709
[2500]	validation_0-rmse:0.06611	validation_1-rmse:0.06661
[3000]	validation_0-rmse:0.06569	validation_1-rmse:0.06619
[3500]	validation_0-rmse:0.06522	validation_1-rmse:0.06572
[4000]	validation_0-rmse:0.06480	validation_1-rmse:0.06529
[4500]	validation_0-rmse:0.06437	validation_1-rmse:0.06484
[4999]	validation_0-rmse:0.06396	validation_1-rmse:0.06445


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df7_training.csv
[0]	validation_0-rmse:1.10561	validation_1-rmse:1.11270


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.07056	validation_1-rmse:0.06989
[1000]	validation_0-rmse:0.06799	validation_1-rmse:0.06729
[1500]	validation_0-rmse:0.06742	validation_1-rmse:0.06680
[2000]	validation_0-rmse:0.06683	validation_1-rmse:0.06627
[2500]	validation_0-rmse:0.06626	validation_1-rmse:0.06569
[3000]	validation_0-rmse:0.06571	validation_1-rmse:0.06516
[3500]	validation_0-rmse:0.06516	validation_1-rmse:0.06464
[4000]	validation_0-rmse:0.06467	validation_1-rmse:0.06422
[4500]	validation_0-rmse:0.06408	validation_1-rmse:0.06370
[4999]	validation_0-rmse:0.06358	validation_1-rmse:0.06321


  df_test.iloc[:, 1] = predicted_power_rescaled


Pen_df6_training.csv
[0]	validation_0-rmse:1.10561	validation_1-rmse:1.10318


  df['month'] = df['time'].dt.month_name()


[500]	validation_0-rmse:0.06752	validation_1-rmse:0.06619
[1000]	validation_0-rmse:0.06476	validation_1-rmse:0.06349
[1500]	validation_0-rmse:0.06423	validation_1-rmse:0.06288
[2000]	validation_0-rmse:0.06367	validation_1-rmse:0.06230
[2500]	validation_0-rmse:0.06312	validation_1-rmse:0.06175
[3000]	validation_0-rmse:0.06262	validation_1-rmse:0.06129
[3500]	validation_0-rmse:0.06205	validation_1-rmse:0.06078
[4000]	validation_0-rmse:0.06149	validation_1-rmse:0.06027
[4500]	validation_0-rmse:0.06093	validation_1-rmse:0.05980
[4999]	validation_0-rmse:0.06045	validation_1-rmse:0.05939


  df_test.iloc[:, 1] = predicted_power_rescaled
