### Import libraries

In [10]:
import os
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import tensorflow as tf
import sklearn
%matplotlib inline

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)

### Data cleaning and preprocessing

In [11]:
#Read dataset
train_a = pd.read_parquet('A/train_targets.parquet')
train_b = pd.read_parquet('B/train_targets.parquet')
train_c = pd.read_parquet('C/train_targets.parquet')
X_train_estimated_a = pd.read_parquet('A/X_train_estimated.parquet')
X_train_estimated_b = pd.read_parquet('B/X_train_estimated.parquet')
X_train_estimated_c = pd.read_parquet('C/X_train_estimated.parquet')
X_train_observed_a = pd.read_parquet('A/X_train_observed.parquet')
X_train_observed_b = pd.read_parquet('B/X_train_observed.parquet')
X_train_observed_c = pd.read_parquet('C/X_train_observed.parquet')

#add location to each sample
train_a["location"] = "A"
train_b["location"] = "B"
train_c["location"] = "C"
X_train_estimated_a["location"] = "A"
X_train_estimated_b["location"] = "B"
X_train_estimated_c["location"] = "C"
X_train_observed_a["location"] = "A"
X_train_observed_b["location"] = "B"
X_train_observed_c["location"] = "C"

#remove extra minute 00 sample
X_train_observed_a = X_train_observed_a.iloc[:-1,:]
X_train_observed_b = X_train_observed_b.iloc[:-1,:]
X_train_observed_c = X_train_observed_c.iloc[:-1,:]

#add date_calc column same as date_forecast column to observed data
X_train_observed_a.insert(0, "date_calc", X_train_observed_a["date_forecast"])
X_train_observed_b.insert(0, "date_calc", X_train_observed_b["date_forecast"])
X_train_observed_c.insert(0, "date_calc", X_train_observed_c["date_forecast"])

#concat all the samples and remove date_calc column
X_train_raw = pd.concat([X_train_observed_a,
                     X_train_observed_b,
                     X_train_observed_c,
                     X_train_estimated_a,
                     X_train_estimated_b,
                     X_train_estimated_c])

#feature indicating time between date_calc and date_forecast
X_train_raw["calc_time"] =(X_train_raw["date_forecast"] - X_train_raw["date_calc"]).astype('timedelta64[s]')

#fill nans
X_train_raw["snow_density:kgm3"] = X_train_raw["snow_density:kgm3"].apply(
    lambda a : np.isnan(a)
    ).map({True: 0, False: 1})
X_train_raw["ceiling_height_agl:m"] = X_train_raw["ceiling_height_agl:m"].apply(
    lambda a : -1000 if np.isnan(a) else a
)
X_train_raw["cloud_base_agl:m"] = X_train_raw["ceiling_height_agl:m"].apply(
    lambda a : -1000 if np.isnan(a) else a
)

#create seperate dataframes for measurments at minute 00, 15, 30 and 45
X_train00 = X_train_raw[X_train_raw["date_forecast"].apply(lambda time: time.minute == 0)].reset_index().iloc[:,1:]
X_train15 = X_train_raw[X_train_raw["date_forecast"].apply(lambda time: time.minute == 15)].reset_index().iloc[:,1:]
X_train30 = X_train_raw[X_train_raw["date_forecast"].apply(lambda time: time.minute == 30)].reset_index().iloc[:,1:]
X_train45 = X_train_raw[X_train_raw["date_forecast"].apply(lambda time: time.minute == 45)].reset_index().iloc[:,1:]

#remove redundant data
X_train15 = X_train15.iloc[:,2:-2]
X_train30 = X_train30.iloc[:,2:-2]
X_train45 = X_train45.iloc[:,2:-2]

#join observations into single sample
X_train = X_train00.join(X_train15, lsuffix="_00", rsuffix="_15").join(X_train30.join(X_train45, lsuffix="_30", rsuffix="_45"))

#rename column for merging with targets
X_train = X_train.rename(columns={"date_forecast" : "time"})

#concat target values and drop NaN values
targets = pd.concat([train_a,
                     train_b,
                     train_c]).dropna()

#merge weatherfeatures with corresponding target pv measurement
dataset = pd.merge(X_train, targets, how="right", on=["time", "location"])

#shuffle dataset
dataset = dataset.sample(frac=1, random_state=43).reset_index().iloc[:,1:]

#split into features and targets
datasetX = dataset.iloc[:, :-1]
datasetY = dataset.iloc[:, -1:]

#add day_of_year and hour feature columns
datasetX["day"] = datasetX["time"].dt.day_of_year
datasetX["hour"] = datasetX["time"].dt.hour

#get indexes of samples in the months of the test dataset
indexMayJuneJuly = datasetX["time"].apply(lambda time : time.month in [5, 6, 7])

#OHE encoding for catagorical feature "location"
datasetX["location_A"] = datasetX["location"].apply(lambda a : a == "A").map({True: 1, False: 0})
datasetX["location_B"] = datasetX["location"].apply(lambda a : a == "B").map({True: 1, False: 0})
datasetX["location_C"] = datasetX["location"].apply(lambda a : a == "C").map({True: 1, False: 0})

#drop location column because we have made the OHE encoding
datasetX = datasetX.drop("location", axis=1)

#drop time and date_calc columns
datasetX = datasetX.iloc[:,2:]

#calculate mean and std for normalizing data, values should also be used for normalizing test data
dataMean = datasetX.mean()
dataStd = datasetX.std()

#normalize data
datasetX.iloc[:,:-4] = ((datasetX.iloc[:,:-4]-dataMean[:-4])/dataStd[:-4]).fillna(value=0)

#partition into training and evalset
trainsetX = datasetX.iloc[:85000,:]
trainsetY = datasetY.iloc[:85000,:]
trainsetIndexMayJuneJuly = indexMayJuneJuly[:85000]
evalsetX = datasetX.iloc[85000:,:]
evalsetY = datasetY.iloc[85000:,:]
evalsetIndexMayJuneJuly = indexMayJuneJuly[85000:]

display(datasetX)

  datasetX.iloc[:,:-4] = ((datasetX.iloc[:,:-4]-dataMean[:-4])/dataStd[:-4]).fillna(value=0)


Unnamed: 0,absolute_humidity_2m:gm3_00,air_density_2m:kgm3_00,ceiling_height_agl:m_00,clear_sky_energy_1h:J_00,clear_sky_rad:W_00,cloud_base_agl:m_00,dew_or_rime:idx_00,dew_point_2m:K_00,diffuse_rad:W_00,diffuse_rad_1h:J_00,direct_rad:W_00,direct_rad_1h:J_00,effective_cloud_cover:p_00,elevation:m_00,fresh_snow_12h:cm_00,fresh_snow_1h:cm_00,fresh_snow_24h:cm_00,fresh_snow_3h:cm_00,fresh_snow_6h:cm_00,is_day:idx_00,is_in_shadow:idx_00,msl_pressure:hPa_00,precip_5min:mm_00,precip_type_5min:idx_00,pressure_100m:hPa_00,pressure_50m:hPa_00,prob_rime:p_00,rain_water:kgm2_00,relative_humidity_1000hPa:p_00,sfc_pressure:hPa_00,snow_density:kgm3_00,snow_depth:cm_00,snow_drift:idx_00,snow_melt_10min:mm_00,snow_water:kgm2_00,sun_azimuth:d_00,sun_elevation:d_00,super_cooled_liquid_water:kgm2_00,t_1000hPa:K_00,total_cloud_cover:p_00,visibility:m_00,wind_speed_10m:ms_00,wind_speed_u_10m:ms_00,wind_speed_v_10m:ms_00,wind_speed_w_1000hPa:ms_00,calc_time,absolute_humidity_2m:gm3_15,air_density_2m:kgm3_15,ceiling_height_agl:m_15,clear_sky_energy_1h:J_15,clear_sky_rad:W_15,cloud_base_agl:m_15,dew_or_rime:idx_15,dew_point_2m:K_15,diffuse_rad:W_15,diffuse_rad_1h:J_15,direct_rad:W_15,direct_rad_1h:J_15,effective_cloud_cover:p_15,elevation:m_15,fresh_snow_12h:cm_15,fresh_snow_1h:cm_15,fresh_snow_24h:cm_15,fresh_snow_3h:cm_15,fresh_snow_6h:cm_15,is_day:idx_15,is_in_shadow:idx_15,msl_pressure:hPa_15,precip_5min:mm_15,precip_type_5min:idx_15,pressure_100m:hPa_15,pressure_50m:hPa_15,prob_rime:p_15,rain_water:kgm2_15,relative_humidity_1000hPa:p_15,sfc_pressure:hPa_15,snow_density:kgm3_15,snow_depth:cm_15,snow_drift:idx_15,snow_melt_10min:mm_15,snow_water:kgm2_15,sun_azimuth:d_15,sun_elevation:d_15,super_cooled_liquid_water:kgm2_15,t_1000hPa:K_15,total_cloud_cover:p_15,visibility:m_15,wind_speed_10m:ms_15,wind_speed_u_10m:ms_15,wind_speed_v_10m:ms_15,wind_speed_w_1000hPa:ms_15,absolute_humidity_2m:gm3_30,air_density_2m:kgm3_30,ceiling_height_agl:m_30,clear_sky_energy_1h:J_30,clear_sky_rad:W_30,cloud_base_agl:m_30,dew_or_rime:idx_30,dew_point_2m:K_30,diffuse_rad:W_30,diffuse_rad_1h:J_30,direct_rad:W_30,direct_rad_1h:J_30,effective_cloud_cover:p_30,elevation:m_30,fresh_snow_12h:cm_30,fresh_snow_1h:cm_30,fresh_snow_24h:cm_30,fresh_snow_3h:cm_30,fresh_snow_6h:cm_30,is_day:idx_30,is_in_shadow:idx_30,msl_pressure:hPa_30,precip_5min:mm_30,precip_type_5min:idx_30,pressure_100m:hPa_30,pressure_50m:hPa_30,prob_rime:p_30,rain_water:kgm2_30,relative_humidity_1000hPa:p_30,sfc_pressure:hPa_30,snow_density:kgm3_30,snow_depth:cm_30,snow_drift:idx_30,snow_melt_10min:mm_30,snow_water:kgm2_30,sun_azimuth:d_30,sun_elevation:d_30,super_cooled_liquid_water:kgm2_30,t_1000hPa:K_30,total_cloud_cover:p_30,visibility:m_30,wind_speed_10m:ms_30,wind_speed_u_10m:ms_30,wind_speed_v_10m:ms_30,wind_speed_w_1000hPa:ms_30,absolute_humidity_2m:gm3_45,air_density_2m:kgm3_45,ceiling_height_agl:m_45,clear_sky_energy_1h:J_45,clear_sky_rad:W_45,cloud_base_agl:m_45,dew_or_rime:idx_45,dew_point_2m:K_45,diffuse_rad:W_45,diffuse_rad_1h:J_45,direct_rad:W_45,direct_rad_1h:J_45,effective_cloud_cover:p_45,elevation:m_45,fresh_snow_12h:cm_45,fresh_snow_1h:cm_45,fresh_snow_24h:cm_45,fresh_snow_3h:cm_45,fresh_snow_6h:cm_45,is_day:idx_45,is_in_shadow:idx_45,msl_pressure:hPa_45,precip_5min:mm_45,precip_type_5min:idx_45,pressure_100m:hPa_45,pressure_50m:hPa_45,prob_rime:p_45,rain_water:kgm2_45,relative_humidity_1000hPa:p_45,sfc_pressure:hPa_45,snow_density:kgm3_45,snow_depth:cm_45,snow_drift:idx_45,snow_melt_10min:mm_45,snow_water:kgm2_45,sun_azimuth:d_45,sun_elevation:d_45,super_cooled_liquid_water:kgm2_45,t_1000hPa:K_45,total_cloud_cover:p_45,visibility:m_45,wind_speed_10m:ms_45,wind_speed_u_10m:ms_45,wind_speed_v_10m:ms_45,wind_speed_w_1000hPa:ms_45,day,hour,location_A,location_B,location_C
0,1.761752,-0.585507,-0.372920,-0.627702,-0.626239,-0.372920,-0.028563,1.486860,-0.651303,-0.658624,-0.444557,-0.450014,0.941315,-0.685717,-0.148938,-0.085448,-0.188650,-0.103304,-0.120653,-0.967020,0.876658,0.901333,-0.169389,-0.216360,0.974829,0.968097,-0.139264,-0.225272,1.160655,0.959198,-0.216887,-0.15401,-0.004639,-0.063687,-0.359856,1.672428,-0.411882,0.386232,0.915435,0.754384,-0.758076,-0.920997,0.262537,-0.254301,0.002482,-0.352674,1.763205,-0.585987,-0.478451,-0.627706,-0.626169,-0.478451,-0.028321,1.502330,-0.648880,-0.656971,-0.443574,-0.449611,0.955453,-0.685717,-0.150101,-0.087546,-0.189689,-0.104350,-0.121908,-0.967512,0.879265,0.909154,-0.167724,-0.217993,0.982744,0.975949,-0.138865,-0.226437,1.174088,0.959366,-0.216288,-0.153972,-0.004639,-0.063272,-0.374745,1.722545,-0.416359,0.397305,0.916001,0.768290,-0.882033,-0.928560,0.262057,-0.310744,0.002208,1.800525,-0.586228,-0.498677,-0.627703,-0.626210,-0.498677,-0.027493,1.502584,-0.643986,-0.654097,-0.440430,-0.448098,0.960375,-0.685717,-0.148987,-0.087431,-0.188016,-0.104523,-0.121557,-0.966341,0.879448,0.916899,-0.172423,-0.217262,0.990512,0.983689,-0.139087,-0.226634,1.190342,0.967070,-0.216506,-0.153994,-0.004639,-0.063791,-0.380506,-1.695781,-0.416313,0.406189,0.916195,0.773088,-0.998382,-0.931394,0.261675,-0.365140,0.000171,1.800147,-0.586014,-0.514379,-0.627706,-0.626247,-0.514379,-0.02672,1.516930,-0.649088,-0.657199,-0.443540,-0.449559,0.955638,-0.685717,-0.149301,-0.08720,-0.188512,-0.104631,-0.121793,-0.967085,0.879266,0.924501,-0.173264,-0.220413,0.998162,0.983659,-0.138842,-0.227205,1.195160,0.974671,-0.216696,-0.154045,-0.004639,-0.063892,-0.374737,-1.687431,-0.411828,0.399664,0.900705,0.768570,-1.102569,-0.872146,0.297386,-0.416623,0.000172,0.427953,23,1,0,0
1,2.056458,-1.514240,-1.083697,-0.510940,-0.405241,-1.083697,-0.028563,1.691697,-0.204412,-0.371524,-0.170943,-0.311678,-1.900910,1.599270,-0.148938,-0.085448,-0.188650,-0.103304,-0.120653,1.033681,-1.140427,0.175525,-0.169389,-0.216360,0.075401,0.057415,-0.139264,-0.225272,-0.444538,0.037487,-0.216887,-0.15401,-0.004639,-0.063687,-0.359856,-1.304262,0.291273,-0.510814,1.988996,-2.080729,0.781245,-1.089687,-0.413889,0.167425,0.002482,-0.352674,2.131875,-1.570204,-1.164946,-0.460206,-0.329038,-1.164946,-0.028321,1.721927,-0.123725,-0.290892,-0.056791,-0.253761,-1.928539,1.599270,-0.150101,-0.087546,-0.189689,-0.104350,-0.121908,1.033138,-1.137230,0.175560,-0.167724,-0.217993,0.075444,0.057440,-0.138865,-0.226437,-0.440043,0.037494,-0.216288,-0.153972,-0.004639,-0.063272,-0.374745,-1.255965,0.344830,-0.527352,2.005540,-2.113431,0.802300,-1.098617,-0.414947,0.166647,0.002208,2.169282,-1.625412,-1.170749,-0.397346,-0.240673,-1.170749,-0.027493,1.751506,-0.058177,-0.217762,0.083016,-0.121441,-1.932202,1.599270,-0.148987,-0.087431,-0.188016,-0.104523,-0.121557,1.034960,-1.136821,0.175591,-0.172423,-0.217262,0.075463,0.057449,-0.139087,-0.226634,-0.426867,0.037511,-0.216506,-0.153994,-0.004639,-0.063791,-0.380506,-1.183207,0.400797,-0.532750,2.005897,-2.115647,0.814412,-1.101951,-0.451151,0.166478,0.000171,2.242558,-1.652189,-1.165899,-0.321674,-0.140924,-1.165899,-0.02672,1.780434,0.027593,-0.133641,0.232872,-0.013130,-1.911208,1.599270,-0.149301,-0.08720,-0.188512,-0.104631,-0.121793,1.033438,-1.137232,0.175584,-0.173264,-0.220413,0.075452,0.057443,-0.138842,-0.227205,-0.419126,0.037510,-0.216696,-0.154045,-0.004639,-0.063892,-0.374737,-1.179275,0.459095,-0.527073,2.020882,-2.081468,0.816761,-1.042220,-0.450837,0.166955,0.000172,0.090168,3,0,0,1
2,-1.737887,3.539171,-0.412686,-0.626432,-0.610047,-0.412686,-0.028563,-2.653791,-0.590288,-0.628091,-0.444557,-0.450014,-0.359945,-0.685717,-0.148938,-0.085448,-0.188650,-0.103304,-0.120653,1.033681,0.876658,1.695899,-0.169389,-0.216360,1.682072,1.725727,-0.139264,-0.225272,0.295247,1.759023,-0.216887,-0.15401,-0.004639,-0.063687,-0.359856,-0.359130,0.071243,-0.510814,-2.366592,-0.345937,-0.366080,-0.920997,-0.164679,0.378287,0.002482,-0.352674,-1.739155,3.514916,-0.514123,-0.618333,-0.580221,-0.514123,-0.028321,-2.626094,-0.502364,-0.582875,-0.443574,-0.449611,-0.171424,-0.685717,-0.150101,-0.087546,-0.189689,-0.104350,-0.121908,1.033138,0.879265,1.696231,-0.167724,-0.217993,1.682445,1.726064,-0.138865,-0.226437,0.254735,1.759331,-0.216288,-0.153972,-0.004639,-0.063272,-0.374745,-0.310218,0.109259,-0.527352,-2.352610,-0.161109,-0.377004,-0.928560,-0.165524,0.378821,0.002208,-1.739543,3.488468,-0.530736,-0.603500,-0.553129,-0.530736,-0.027493,-2.611918,-0.448716,-0.554584,-0.440430,-0.448098,0.016635,-0.685717,-0.148987,-0.087431,-0.188016,-0.104523,-0.121557,1.034960,0.879448,1.696413,-0.172423,-0.217262,1.682563,1.718561,-0.139087,-0.226634,0.212985,1.759477,-0.216506,-0.153994,-0.004639,-0.063791,-0.380506,-0.234473,0.143727,-0.532750,-2.337557,0.016304,-0.384541,-0.874541,-0.130379,0.432286,0.000171,-1.739145,3.459992,-0.542557,-0.581944,-0.526469,-0.542557,-0.02672,-2.582053,-0.374135,-0.517722,-0.379078,-0.416938,0.200440,-0.685717,-0.149301,-0.08720,-0.188512,-0.104631,-0.121793,1.033438,-1.137232,1.696343,-0.173264,-0.220413,1.682507,1.718512,-0.138842,-0.227205,0.177458,1.759440,-0.216696,-0.154045,-0.004639,-0.063892,-0.374737,-0.227918,0.174438,-0.527073,-2.337055,0.183947,-0.388193,-0.872146,-0.130170,0.432218,0.000172,-1.470947,9,1,0,0
3,0.693441,-1.049874,-1.083697,0.532734,0.340899,-1.083697,-0.028563,0.828453,0.224339,0.277023,1.045710,1.355522,-1.889496,1.599270,-0.148938,-0.085448,-0.188650,-0.103304,-0.120653,1.033681,-1.140427,0.565172,-0.169389,-0.216360,0.444396,0.432400,-0.139264,-0.225272,-1.079636,0.418359,-0.216887,-0.15401,-0.004639,-0.063687,-0.359856,0.418522,0.694974,-0.510814,1.176159,-2.080729,1.168934,1.047052,-1.695538,1.116307,0.002482,-0.352674,0.694064,-1.023414,-1.164946,0.438571,0.235029,-1.164946,-0.028321,0.828899,0.175893,0.244135,0.880142,1.201406,-1.841409,1.599270,-0.150101,-0.087546,-0.189689,-0.104350,-0.121908,1.033138,-1.137230,0.565283,-0.167724,-0.217993,0.444515,0.432495,-0.138865,-0.226437,-1.057624,0.418432,-0.216288,-0.153972,-0.004639,-0.063272,-0.374745,0.469448,0.643457,-0.527352,1.161532,-1.967299,1.166098,0.998757,-1.662058,1.068386,0.002208,0.694254,-0.996432,-1.170749,0.338210,0.124733,-1.170749,-0.027493,0.814392,0.114120,0.199226,0.703622,1.035018,-1.768581,1.599270,-0.148987,-0.087431,-0.188016,-0.104523,-0.121557,1.034960,-1.136821,0.565353,-0.172423,-0.217262,0.444557,0.432536,-0.139087,-0.226634,-1.031562,0.418477,-0.216506,-0.153994,-0.004639,-0.063791,-0.380506,0.548196,0.589018,-0.532750,1.161762,-1.818234,1.149023,0.944738,-1.662956,0.963903,0.000171,0.694118,-0.968743,-1.165899,0.232737,0.012243,-1.165899,-0.02672,0.814244,0.093450,0.178299,0.515446,0.868379,-1.673031,1.599270,-0.149301,-0.08720,-0.188512,-0.104631,-0.121793,1.033438,-1.137232,0.565329,-0.173264,-0.220413,0.444535,0.432520,-0.138842,-0.227205,-1.001672,0.418468,-0.216696,-0.154045,-0.004639,-0.063892,-0.374737,0.556529,0.531904,-0.527073,1.146223,-1.640078,1.117843,0.885287,-1.626616,0.909691,0.000172,0.911808,14,0,0,1
4,-0.043325,-0.039193,-0.325236,-0.627702,-0.626239,-0.325236,-0.028563,0.140785,-0.651303,-0.658624,-0.444557,-0.450014,0.941315,-0.558773,-0.148938,-0.085448,-0.188650,-0.103304,-0.120653,-0.967020,0.876658,-0.290518,1.920496,2.382284,-0.239781,-0.241047,-0.139264,-0.225272,0.672118,-0.236738,-0.216887,-0.15401,-0.004639,-0.063687,2.827395,-0.914508,-0.569230,1.283278,-0.127451,0.754384,-1.692682,1.328202,1.686591,-0.096154,0.002482,-0.352674,-0.043275,-0.011860,-0.437688,-0.627706,-0.626169,-0.437688,-0.028321,0.126190,-0.648880,-0.656971,-0.443574,-0.449611,0.955453,-0.558773,-0.150101,-0.087546,-0.189689,-0.104350,-0.121908,-0.967512,0.879265,-0.290575,-0.167724,-0.217993,-0.239802,-0.241078,-0.138865,-0.226437,0.710902,-0.236779,-0.216288,-0.153972,-0.004639,-0.063272,2.531421,-0.865346,-0.499241,1.321962,-0.158191,0.768290,-1.713525,1.225500,1.580432,0.007517,0.002208,-0.080136,0.015407,-0.464787,-0.627703,-0.626210,-0.464787,-0.027493,0.111560,-0.643986,-0.654097,-0.440430,-0.448098,0.960375,-0.558773,-0.148987,-0.087431,-0.188016,-0.104523,-0.121557,-0.966341,0.879448,-0.282949,-0.172423,-0.217262,-0.239803,-0.233436,-0.139087,-0.226634,0.754399,-0.236783,-0.216506,-0.153994,-0.004639,-0.063791,2.146563,-0.790944,-0.429392,0.406189,-0.188851,0.773088,-1.714313,1.058443,1.473480,0.166478,0.000171,-0.080102,0.042757,-0.488245,-0.627706,-0.626247,-0.488245,-0.02672,0.096923,-0.649088,-0.657199,-0.443540,-0.449559,0.955638,-0.558773,-0.149301,-0.08720,-0.188512,-0.104631,-0.121793,-0.967085,0.879266,-0.275293,-0.173264,-0.220413,-0.232118,-0.233434,-0.138842,-0.227205,0.795098,-0.229161,-0.216696,-0.154045,-0.004639,-0.063892,2.116180,-0.785704,-0.359887,0.399664,-0.219468,0.768570,-1.694261,0.941978,1.366276,0.326112,0.000172,-1.178808,5,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93019,1.761752,-0.940612,-0.343415,-0.609688,-0.541341,-0.343415,-0.028563,1.486860,-0.585341,-0.625541,-0.444557,-0.450014,0.941315,-0.558773,-0.148938,-0.085448,-0.188650,-0.103304,-0.120653,1.033681,-1.140427,-0.122435,-0.169389,-0.216360,-0.039910,-0.049726,-0.139264,9.086243,1.488673,-0.061538,-0.216887,-0.15401,-0.004639,-0.063687,0.835363,-0.960896,0.162704,1.283278,1.176159,0.754384,-1.758067,-1.089687,0.120131,-0.043438,0.002482,-0.352674,1.800072,-0.941399,-0.382380,-0.581234,-0.473884,-0.382380,-0.028321,1.502330,-0.517180,-0.590138,-0.443574,-0.449611,0.955453,-0.558773,-0.150101,-0.087546,-0.189689,-0.104350,-0.121908,1.033138,-1.137230,-0.114820,-0.167724,-0.217993,-0.032199,-0.042065,-0.138865,9.295167,1.496914,-0.053927,-0.216288,-0.153972,-0.004639,-0.063272,1.285921,-0.911782,0.232459,1.321962,1.176879,0.768290,-1.788753,-1.041932,0.119530,0.007517,0.002208,1.800525,-0.969084,-0.334757,-0.531884,-0.385523,-0.334757,-0.027493,1.517227,-0.412616,-0.536663,-0.440430,-0.448098,0.960375,-0.558773,-0.148987,-0.087431,-0.188016,-0.104523,-0.121557,1.034960,-1.136821,-0.107177,-0.172423,-0.217262,-0.024499,-0.042064,-0.139087,9.333560,1.492690,-0.053918,-0.216506,-0.153994,-0.004639,-0.063791,1.304206,-0.837272,0.302417,2.284067,1.161762,0.773088,-1.798708,-0.988246,0.154751,0.060154,0.000171,1.837015,-0.968743,-0.283846,-0.461829,-0.280088,-0.283846,-0.02672,1.516930,-0.360964,-0.500652,-0.442657,-0.449047,0.955638,-0.558773,-0.149301,-0.08720,-0.188512,-0.104631,-0.121793,1.033438,-1.137232,-0.099527,-0.173264,-0.220413,-0.016820,-0.034414,-0.138842,9.247398,1.482924,-0.046299,-0.216696,-0.154045,-0.004639,-0.063892,1.701027,-0.831637,0.372415,2.253140,1.146223,0.768570,-1.786775,-0.928838,0.154867,0.113902,0.000172,0.692704,5,0,1,0
93020,1.430207,-1.077191,-0.203188,-0.353704,-0.143983,-0.203188,-0.028563,1.296656,-0.326441,-0.456275,-0.443671,-0.449631,0.941315,1.599270,-0.148938,-0.085448,-0.188650,-0.103304,-0.120653,1.033681,-1.140427,-0.626684,-0.169389,-0.216360,-0.739463,-0.746131,-0.139264,4.430485,1.349091,-0.754722,-0.216887,-0.15401,-0.004639,-0.063687,3.624207,-1.014514,0.455119,0.386232,0.869427,0.754384,-1.754443,0.316062,1.045767,-0.676026,0.002482,-0.352674,1.468269,-1.078096,-0.256492,-0.252329,-0.017904,-0.256492,-0.028321,1.297374,-0.232378,-0.386460,-0.442691,-0.449087,0.955453,1.599270,-0.150101,-0.087546,-0.189689,-0.104350,-0.121908,1.033138,-1.137230,-0.626806,-0.167724,-0.217993,-0.731899,-0.746256,-0.138865,4.534365,1.314447,-0.754854,-0.216288,-0.153972,-0.004639,-0.063272,4.192087,-0.965828,0.524090,0.397305,0.869967,0.768290,-1.785936,0.261842,1.010324,-0.682048,0.002208,1.468644,-1.105818,-0.221342,-0.137741,0.115981,-0.221342,-0.027493,1.312235,-0.130377,-0.306636,-0.439555,-0.447479,0.960375,1.599270,-0.148987,-0.087431,-0.188016,-0.104523,-0.121557,1.034960,-1.136821,-0.619209,-0.172423,-0.217262,-0.731931,-0.746314,-0.139087,4.553463,1.267687,-0.754897,-0.216506,-0.153994,-0.004639,-0.063791,4.252453,-0.891795,0.593683,0.406189,0.885502,0.773088,-1.796723,0.205656,0.938860,-0.684110,0.000171,1.468338,-1.105432,-0.183983,-0.012907,0.254686,-0.183983,-0.02672,1.311981,-0.036618,-0.224820,-0.440891,-0.448023,0.952733,1.599270,-0.149301,-0.08720,-0.188512,-0.104631,-0.121793,1.033438,-1.137232,-0.619184,-0.173264,-0.220413,-0.731920,-0.738647,-0.138842,4.510097,1.223234,-0.754881,-0.216696,-0.154045,-0.004639,-0.063892,4.607097,-0.886518,0.663653,0.399664,0.885363,0.768570,-1.785636,0.148299,0.903090,-0.734939,0.000172,0.464471,5,0,0,1
93021,-0.780091,1.381227,-1.083697,-0.627702,-0.626239,-1.083697,-0.028563,-0.722464,-0.651303,-0.658624,-0.444557,-0.450014,-1.912325,-0.558773,-0.148938,-0.085448,-0.188650,-0.103304,-0.120653,-0.967020,0.876658,1.329180,-0.169389,-0.216360,1.359199,1.373700,-0.139264,-0.225272,-1.044741,1.385777,-0.216887,-0.15401,-0.004639,-0.063687,-0.359856,1.228697,-0.876348,-0.510814,-0.740914,-2.103592,0.754524,-0.696077,-0.805504,0.009278,0.002482,-0.352674,-0.780614,1.409788,-1.164946,-0.627706,-0.626169,-1.164946,-0.028321,-0.722920,-0.648880,-0.656971,-0.443574,-0.449611,-1.832696,-0.558773,-0.150101,-0.087546,-0.189689,-0.104350,-0.121908,-0.967512,0.879265,1.329441,-0.167724,-0.217993,1.359507,1.373970,-0.138865,-0.226437,-1.008498,1.386020,-0.216288,-0.153972,-0.004639,-0.063272,-0.374745,1.280491,-0.929440,-0.527352,-0.756668,-1.920536,0.742947,-0.758503,-0.806896,-0.045527,0.002208,-0.780774,1.437446,-1.170749,-0.627703,-0.626210,-1.170749,-0.027493,-0.737694,-0.643986,-0.654097,-0.440430,-0.448098,-1.727676,-0.558773,-0.148987,-0.087431,-0.188016,-0.104523,-0.121557,-0.966341,0.879448,1.329587,-0.172423,-0.217262,1.359605,1.374091,-0.139087,-0.226634,-0.975311,1.386138,-0.216506,-0.153994,-0.004639,-0.063791,-0.380506,1.362904,-0.979352,-0.532750,-0.772074,-1.697502,0.722219,-0.760836,-0.807564,-0.046169,0.000171,-0.780587,1.464328,-1.165899,-0.627706,-0.626247,-1.165899,-0.02672,-0.752150,-0.649088,-0.657199,-0.443540,-0.449559,-1.597511,-0.558773,-0.149301,-0.08720,-0.188512,-0.104631,-0.121793,-0.967085,0.879266,1.337171,-0.173264,-0.220413,1.367245,1.381703,-0.138842,-0.227205,-0.931486,1.393725,-0.216696,-0.154045,-0.004639,-0.063892,-0.374737,1.375376,-1.025793,-0.527073,-0.787229,-1.450075,0.692754,-0.758763,-0.807134,-0.045255,0.000172,0.930067,20,0,1,0
93022,0.435573,0.261281,-1.083697,-0.627702,-0.626239,-1.083697,-0.028563,0.565091,-0.651303,-0.658624,-0.444557,-0.450014,-0.474090,-0.685717,-0.148938,-0.085448,-0.188650,-0.103304,-0.120653,-0.967020,0.876658,1.061777,-0.169389,-0.216360,1.136263,1.136459,-0.139264,-0.225272,0.448787,1.134398,-0.216887,-0.15401,-0.004639,-0.063687,-0.359856,1.529396,-0.677975,-0.510814,0.255964,-0.663172,-0.922478,-0.920997,0.226935,-0.201585,0.002482,-0.352674,0.435995,0.261533,-1.164946,-0.627706,-0.626169,-1.164946,-0.028321,0.565384,-0.648880,-0.656971,-0.443574,-0.449611,-0.493804,-0.685717,-0.150101,-0.087546,-0.189689,-0.104350,-0.121908,-0.967512,0.879265,1.061986,-0.167724,-0.217993,1.136524,1.129034,-0.138865,-0.226437,0.458255,1.126980,-0.216288,-0.153972,-0.004639,-0.063272,-0.374745,1.581394,-0.699951,-0.527352,0.256142,-0.690106,-0.856605,-0.985246,0.226425,-0.151614,0.002208,0.436124,0.288876,-1.170749,-0.627703,-0.626210,-1.170749,-0.027493,0.565474,-0.643986,-0.654097,-0.440430,-0.448098,-0.506366,-0.685717,-0.148987,-0.087431,-0.188016,-0.104523,-0.121557,-0.966341,0.879448,1.062105,-0.172423,-0.217262,1.128921,1.129134,-0.139087,-0.226634,0.473145,1.127079,-0.216506,-0.153994,-0.004639,-0.063791,-0.380506,1.664260,-0.717408,-0.532750,0.256237,-0.705143,-0.778915,-0.988246,0.190392,-0.152493,0.000171,0.436045,0.288800,-1.165899,-0.627706,-0.626247,-1.165899,-0.02672,0.550740,-0.649088,-0.657199,-0.443540,-0.449559,-0.511188,-0.685717,-0.149301,-0.08720,-0.188512,-0.104631,-0.121793,-0.967085,0.879266,1.062061,-0.173264,-0.220413,1.128880,1.121442,-0.138842,-0.227205,0.479259,1.119433,-0.216696,-0.154045,-0.004639,-0.063892,-0.374737,1.676506,-0.730224,-0.527073,0.256224,-0.707604,-0.691815,-1.042220,0.190497,-0.151361,0.000172,0.637928,22,1,0,0


### Make model

In [12]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=43, criterion="absolute_error")

model.fit(trainsetX, trainsetY)

preds = model.predict(evalsetX)

mae = mean_absolute_error(evalsetY, preds)

print(mae)

  return fit_method(estimator, *args, **kwargs)


### Preprocess the test-data

In [None]:
#read data
X_test_estimated_a = pd.read_parquet('A/X_test_estimated.parquet')
X_test_estimated_b = pd.read_parquet('B/X_test_estimated.parquet')
X_test_estimated_c = pd.read_parquet('C/X_test_estimated.parquet')

#add location to each sample
X_test_estimated_a["location"] = "A"
X_test_estimated_b["location"] = "B"
X_test_estimated_c["location"] = "C"

#concat all the samples to one dataframe
X_test_raw = pd.concat([
                     X_test_estimated_a,
                     X_test_estimated_b,
                     X_test_estimated_c])

#feature indicating time between date_calc and date_forecast
X_test_raw["calc_time"] =(X_test_raw["date_forecast"] - X_test_raw["date_calc"]).astype('timedelta64[s]')

#fill nans
X_test_raw["snow_density:kgm3"] = X_test_raw["snow_density:kgm3"].apply(
    lambda a : np.isnan(a)
    ).map({True: 0, False: 1})
X_test_raw["ceiling_height_agl:m"] = X_test_raw["ceiling_height_agl:m"].apply(
    lambda a : -1000 if np.isnan(a) else a
)
X_test_raw["cloud_base_agl:m"] = X_test_raw["ceiling_height_agl:m"].apply(
    lambda a : -1000 if np.isnan(a) else a
)

#create seperate dataframes for measurments at minute 00, 15, 30 and 45
X_test00 = X_test_raw[X_test_raw["date_forecast"].apply(lambda time: time.minute == 0)].reset_index().iloc[:,1:]
X_test15 = X_test_raw[X_test_raw["date_forecast"].apply(lambda time: time.minute == 15)].reset_index().iloc[:,1:]
X_test30 = X_test_raw[X_test_raw["date_forecast"].apply(lambda time: time.minute == 30)].reset_index().iloc[:,1:]
X_test45 = X_test_raw[X_test_raw["date_forecast"].apply(lambda time: time.minute == 45)].reset_index().iloc[:,1:]

#remove redundant data
X_test15 = X_test15.iloc[:,2:-2]
X_test30 = X_test30.iloc[:,2:-2]
X_test45 = X_test45.iloc[:,2:-2]

#join observations into single sample
X_test_estimated = X_test00.join(X_test15, lsuffix="_00", rsuffix="_15").join(X_test30.join(X_test45, lsuffix="_30", rsuffix="_45"))

#rename column for merging with targets
X_test_estimated = X_test_estimated.rename(columns={"date_forecast" : "time"})

#parse dates
parse_dates = ['time']
X_test_targets = pd.read_csv("test.csv", parse_dates=parse_dates)

#merge estimate and target dataframes
X_test = pd.merge(X_test_estimated, X_test_targets, on=["time", "location"], how="right").iloc[:,:-2]

#add day and hour feature columns
X_test["day"] = X_test["time"].dt.day_of_year
X_test["hour"] = X_test["time"].dt.hour

#OHE encoding for categorical feature "location"
X_test["location_A"] = X_test["location"].apply(lambda a : a == "A").map({True: 1, False: 0})
X_test["location_B"] = X_test["location"].apply(lambda a : a == "B").map({True: 1, False: 0})
X_test["location_C"] = X_test["location"].apply(lambda a : a == "C").map({True: 1, False: 0})

#drop location column because we have made the OHE encoding
X_test = X_test.drop("location", axis=1)

#drop time and date_calc columns
X_test = X_test.iloc[:,2:]

#normalize data
X_test.iloc[:,:-4] = ((X_test.iloc[:,:-4]-dataMean[:-4])/dataStd[:-4]).fillna(value=0)

display(X_test)