In [386]:
using CSV, DataFrames

In [387]:
Input_df = CSV.read("../201215_datafile_all.csv", DataFrame);

In [388]:
describe(Input_df)

Unnamed: 0_level_0,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Real,Float64,Real,Int64,DataType
1,electkwh,0.353611,0.15532,0.350303,1.82909,0,Float64
2,heatingkwh,1.35625,0.0,0.027696,7.5231,0,Float64
3,hotwaterkwh,0.344568,0.0,0.0,5.50031,0,Float64
4,PV_generation,1.89333,0.0,0.007,10.0,0,Float64
5,Temperature,9.77827,-20.413,10.934,30.197,0,Float64
6,month,6.52603,1.0,7.0,12.0,0,Int64
7,day,15.7205,1.0,16.0,31.0,0,Int64
8,hour,11.5,0.0,11.5,23.0,0,Int64


In [389]:
# add new column with number of days
Input_df[!, :nday] = 1:nrow(Input_df)

1:8760

In [390]:
# calculate residual demand
Input_df[!, :d_res] = Input_df[!,:electkwh] + Input_df[!,:heatingkwh] + Input_df[!,:hotwaterkwh] - Input_df[!,:PV_generation]

8760-element Vector{Float64}:
  4.022784
  4.095097
  6.3071720000000004
  2.665645
  1.4635
  0.280277
  5.233008
  4.75857
  2.067222
  0.4182080000000008
 -1.9146470000000004
 -6.041333000000001
 -3.574206000000001
  ⋮
 -2.036517000000001
 -0.5947220000000009
  2.6122039999999993
  6.4191720000000005
  5.433409999999999
  5.916842
  6.879334
  4.671965
  2.963785
  5.282677
  2.812173
  3.1335830000000002

## Add periodical time representation using cos/sin

In [391]:
# add columns with cos and sin values for periodical time values day + month
Input_df[!, :hour_cos] = cos.(Input_df[!,:hour] ./ maximum(Input_df[!,:hour]) .* 2*pi);
Input_df[!, :hour_sin] = sin.(Input_df[!,:hour] ./ maximum(Input_df[!,:hour]) .* 2*pi);

Input_df[!, :month_cos] = cos.(Input_df[!,:month] ./ maximum(Input_df[!,:month]) .* 2*pi);
Input_df[!, :month_sin] = sin.(Input_df[!,:month] ./ maximum(Input_df[!,:month]) .* 2*pi);

#Input_df[!, :nday_cos] = cos.(Input_df[!,:nday] ./ maximum(Input_df[!,:nday]) .* 2*pi);
#Input_df[!, :nday_sin] = sin.(Input_df[!,:nday] ./ maximum(Input_df[!,:nday]) .* 2*pi);

## Add seasons

In [392]:
Input_df[!, :spring] = (Input_df[!,:month] .>= 3) .* (Input_df[!,:month] .<= 5);
Input_df[!, :summer] = (Input_df[!,:month] .>= 6) .* (Input_df[!,:month] .<= 8);
Input_df[!, :autumn] = (Input_df[!,:month] .>= 9) .* (Input_df[!,:month] .<= 11);
Input_df[!, :winter] = convert.(Bool, (Input_df[!,:month] .>= 12) .+ (Input_df[!,:month] .<= 2));

Input_df[!, :season] = ifelse.(Input_df[!,:spring] .== true, 1,
                        ifelse.(Input_df[!,:summer] .== true, 2,
                        ifelse.(Input_df[!,:autumn] .== true, 3, 
                        4)));

In [393]:
describe(Input_df)

Unnamed: 0_level_0,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Real,Float64,Real,Int64,DataType
1,electkwh,0.353611,0.15532,0.350303,1.82909,0,Float64
2,heatingkwh,1.35625,0.0,0.027696,7.5231,0,Float64
3,hotwaterkwh,0.344568,0.0,0.0,5.50031,0,Float64
4,PV_generation,1.89333,0.0,0.007,10.0,0,Float64
5,Temperature,9.77827,-20.413,10.934,30.197,0,Float64
6,month,6.52603,1.0,7.0,12.0,0,Int64
7,day,15.7205,1.0,16.0,31.0,0,Int64
8,hour,11.5,0.0,11.5,23.0,0,Int64
9,nday,4380.5,1.0,4380.5,8760.0,0,Int64
10,d_res,0.1611,-9.60851,0.302898,13.0441,0,Float64


## Add dynamic prices based on Ye et al. 2020

In [394]:
function set_dynamic_prices(Input_df)
    map(eachrow(Input_df)) do r
        if r.month >= 5 && r.month <= 10
            if (r.hour >= 6 && r.hour <= 9) || (r.hour >= 16 && r.hour <= 17)
                return 0.3f0
            elseif (r.hour >= 10 && r.hour <= 15)
                return 0.6f0
            else
                return 0.15f0
            end
        elseif r.month >= 11 || r.month <= 4
            if (r.hour >= 6 && r.hour <= 9) || (r.hour >= 16 && r.hour <= 17)
                return 0.6f0
            elseif r.hour >= 10 && r.hour <= 15
                return 0.3f0
            else
                return 0.15f0
            end
        end
    end
end        

set_dynamic_prices (generic function with 1 method)

In [395]:
Input_df[!, "p_buy"] = set_dynamic_prices(Input_df);
Input_df[!, "p_sell"] = 0.5 .* Input_df[!, "p_buy"];

In [396]:
describe(Input_df)

Unnamed: 0_level_0,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Abstrac…,Real,Float64,Real,Int64,DataType
1,electkwh,0.353611,0.15532,0.350303,1.82909,0,Float64
2,heatingkwh,1.35625,0.0,0.027696,7.5231,0,Float64
3,hotwaterkwh,0.344568,0.0,0.0,5.50031,0,Float64
4,PV_generation,1.89333,0.0,0.007,10.0,0,Float64
5,Temperature,9.77827,-20.413,10.934,30.197,0,Float64
6,month,6.52603,1.0,7.0,12.0,0,Int64
7,day,15.7205,1.0,16.0,31.0,0,Int64
8,hour,11.5,0.0,11.5,23.0,0,Int64
9,nday,4380.5,1.0,4380.5,8760.0,0,Int64
10,d_res,0.1611,-9.60851,0.302898,13.0441,0,Float64


## Extract training, testing + evalution data set for summer, winter, both, all

In [397]:
# filter summer
Input_data_summer = filter(:summer => !=(0), Input_df)
describe(Input_data_summer), size(Input_data_summer)

([1m21×7 DataFrame[0m
[1m Row [0m│[1m variable      [0m[1m mean           [0m[1m min          [0m[1m median         [0m[1m max       [0m ⋯
[1m     [0m│[90m Symbol        [0m[90m Abstract…      [0m[90m Real         [0m[90m Float64        [0m[90m Real      [0m ⋯
─────┼──────────────────────────────────────────────────────────────────────────
   1 │ electkwh          0.308262         0.15532      0.310149         1.1867 ⋯
   2 │ heatingkwh        0.00587495       0.0          0.0              1.2395
   3 │ hotwaterkwh       0.257596         0.0          0.0              5.5003
   4 │ PV_generation     2.26376          0.0          0.603            9.154
   5 │ Temperature      20.596            7.075       20.7915          29.626  ⋯
   6 │ month             7.01087          6            7.0              8
   7 │ day              15.837            1           16.0             31
   8 │ hour             11.5              0           11.5             23
   9 │ nday 

In [398]:
function train_eval_test_split(Input_df)
    train = filter(row -> row.day <= 15, Input_df)
    eval = filter(row -> row.day > 15 && row.day <= 20, Input_df)
    test = filter(row -> row.day > 20, Input_df)
    return train, eval, test
end

train_eval_test_split (generic function with 1 method)

In [399]:
summer_training, summer_evaluation, summer_testing = train_eval_test_split(Input_data_summer)

CSV.write("../summer_train_TOU.csv", summer_training);
CSV.write("../summer_eval_TOU.csv", summer_evaluation);
CSV.write("../summer_test_TOU.csv", summer_testing);

In [400]:
# filter winter
Input_data_winter = filter(:winter => !=(0), Input_df)
describe(Input_data_winter), size(Input_data_winter)

([1m21×7 DataFrame[0m
[1m Row [0m│[1m variable      [0m[1m mean           [0m[1m min             [0m[1m median         [0m[1m max    [0m ⋯
[1m     [0m│[90m Symbol        [0m[90m Abstract…      [0m[90m Real            [0m[90m Float64        [0m[90m Real   [0m ⋯
─────┼──────────────────────────────────────────────────────────────────────────
   1 │ electkwh          0.40455          0.200706        0.40841          1.5 ⋯
   2 │ heatingkwh        3.50821          0.01753         3.67684          7.5
   3 │ hotwaterkwh       0.396046         0.0             0.0              5.5
   4 │ PV_generation     1.32633          0.0             0.0             10.0
   5 │ Temperature      -2.88367        -20.413          -2.0965          14.8 ⋯
   6 │ month             5.1              1               2.0             12
   7 │ day              15.5333           1              15.5             31
   8 │ hour             11.5              0              11.5             23
  

In [401]:
winter_training, winter_evaluation, winter_testing = train_eval_test_split(Input_data_winter)

# write data files
CSV.write("../winter_train_TOU.csv", winter_training);
CSV.write("../winter_eval_TOU.csv", winter_evaluation);
CSV.write("../winter_test_TOU.csv", winter_testing);

In [402]:
all_training, all_evaluation, all_testing = train_eval_test_split(Input_df)

# write data files
CSV.write("../all_train_TOU.csv", all_training);
CSV.write("../all_eval_TOU.csv", all_evaluation);
CSV.write("../all_test_TOU.csv", all_testing);

In [403]:
describe(vcat(Input_data_winter, Input_data_summer) ), size(vcat(Input_data_winter, Input_data_summer) )

([1m21×7 DataFrame[0m
[1m Row [0m│[1m variable      [0m[1m mean           [0m[1m min          [0m[1m median         [0m[1m max       [0m ⋯
[1m     [0m│[90m Symbol        [0m[90m Abstract…      [0m[90m Real         [0m[90m Float64        [0m[90m Real      [0m ⋯
─────┼──────────────────────────────────────────────────────────────────────────
   1 │ electkwh          0.355877         0.15532      0.326538         1.5557 ⋯
   2 │ heatingkwh        1.7378           0.0          0.0191135        7.5231
   3 │ hotwaterkwh       0.326061         0.0          0.0              5.5003
   4 │ PV_generation     1.8002           0.0          0.0045          10.0
   5 │ Temperature       8.98519        -20.413       12.6065          29.626  ⋯
   6 │ month             6.06593          1            7.0             12
   7 │ day              15.6868           1           16.0             31
   8 │ hour             11.5              0           11.5             23
   9 │ nday   

In [404]:
# both seasons split
both_training, both_evaluation, both_testing = train_eval_test_split(vcat(Input_data_winter, Input_data_summer))

# write data files
CSV.write("../both_train_TOU.csv", both_training);
CSV.write("../both_test_TOU.csv", both_testing);
CSV.write("../both_eval_TOU.csv", both_evaluation);

## Dataset for fixed prices

In [405]:
Input_df[!, "p_buy"] .= 0.3;
Input_df[!, "p_sell"] .= 0.1;

In [406]:
Input_data_summer = filter(:summer => !=(0), Input_df);
Input_data_winter = filter(:winter => !=(0), Input_df);

In [407]:
summer_training, summer_evaluation, summer_testing = train_eval_test_split(Input_data_summer);
winter_training, winter_evaluation, winter_testing = train_eval_test_split(Input_data_winter);
all_training, all_evaluation, all_testing = train_eval_test_split(Input_df);
both_training, both_evaluation, both_testing = train_eval_test_split(vcat(Input_data_winter, Input_data_summer));

In [408]:
CSV.write("../both_train_fix.csv", both_training);
CSV.write("../both_test_fix.csv", both_testing);
CSV.write("../both_eval_fix.csv", both_evaluation);
CSV.write("../all_train_fix.csv", all_training);
CSV.write("../all_test_fix.csv", all_testing);
CSV.write("../all_eval_fix.csv", all_evaluation);
CSV.write("../summer_train_fix.csv", summer_training);
CSV.write("../summer_test_fix.csv", summer_testing);
CSV.write("../summer_eval_fix.csv", summer_evaluation);
CSV.write("../winter_train_fix.csv", winter_training);
CSV.write("../winter_test_fix.csv", winter_testing);
CSV.write("../winter_eval_fix.csv", winter_evaluation);