# Test on several weeks

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from tqdm import tqdm
import sys
import common_functions as cf
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, train_test_split
from sklearn.linear_model import LinearRegression, Ridge, ElasticNet, BayesianRidge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.isotonic import IsotonicRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import RepeatedKFold, GridSearchCV
from sklearn.svm import SVR

In [2]:
weather_path = 'Input/weather_train_set0.csv'
demand_path = 'Input/demand_train_set0.csv'
solar_path = 'Input/pv_train_set0.csv'
dp = cf.DataPreprocesser(weather_path, demand_path, solar_path)
dp.load_df()
dp.remove_nan()
dp.interpolate_df()
dp.get_zenith_angle()
dp.set_df(dp.df[dp.df['week']!=44])
dp.df.head()

Unnamed: 0_level_0,demand_MW,irradiance_Wm-2,pv_power_mw,panel_temp_C,temp_location3,temp_location6,temp_location2,temp_location4,temp_location5,temp_location1,...,solar_location6,solar_location2,solar_location4,solar_location5,solar_location1,week,dow,hour,sp,zenith_angle
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-11-06 00:00:00,2.32,0.0,0.0,1.61,5.14,10.76,6.35,3.58,11.12,7.39,...,0.0,0.0,0.0,0.0,0.0,45,0,0,1.0,122.590257
2017-11-06 00:30:00,2.22,0.0,0.0,2.98,5.05,10.7,6.245,3.58,11.06,7.325,...,0.0,0.0,0.0,0.0,0.0,45,0,0,2.0,115.596177
2017-11-06 01:00:00,2.04,0.0,0.0,2.76,4.96,10.64,6.14,3.58,11.0,7.26,...,0.0,0.0,0.0,0.0,0.0,45,0,1,3.0,108.524339
2017-11-06 01:30:00,1.97,0.0,0.0,3.98,5.03,10.6,6.085,3.57,10.965,7.255,...,0.0,0.0,0.0,0.0,0.0,45,0,1,4.0,101.398356
2017-11-06 02:00:00,1.91,0.0,0.0,2.13,5.1,10.56,6.03,3.56,10.93,7.25,...,0.0,0.0,0.0,0.0,0.0,45,0,2,5.0,94.234878


In [3]:
pred_weeks = np.arange(10,29).tolist()
pred_weeks

[10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]

In [4]:
class MultiScoresComparator:
    def __init__(self,dp, pred_weeks):
        self.dp = dp
        self.pred_weeks = pred_weeks
    def write_B_on_several_weeks_with_one_method(self,B_dir, data_preprocess= None, predict_weeks=None,
                                                pred_demand=False, pred_pv=False,
                                                weather_cols_demand=None, weather_cols_pv=None,
                                                nb_weeks_before_demand=4, nb_weeks_before_pv= 5,
                                                model_demand=RandomForestRegressor(random_state=2019, n_estimators=450),
                                                model_pv=RandomForestRegressor(random_state=2019, n_estimators=300)):
        if data_preprocess is None:
            dp = self.dp
        else:
            dp=data_preprocess
        if predict_weeks is None:
            pred_weeks = self.pred_weeks
        else:
            pred_weeks = predict_weeks
        with tqdm(total=len(pred_weeks), file=sys.stdout) as pbar:
            for pred_week in pred_weeks:
                mp=cf.MLPredictor(dp, pred_week)
                mp.get_demand_previous_week()
                if (pred_demand or pred_pv):
                    mp.get_weather_prediction(dp.weather_path)
                if pred_demand:
                    mp.predict_demand_from_past_and_weather(model_demand, nb_week_before=nb_weeks_before_demand, 
                                                            weather_cols=weather_cols_demand)
                if pred_pv:
                    mp.predict_solar_power_weeks_before(model_pv, nb_week_before=nb_weeks_before_pv, 
                                                        weather_cols=weather_cols_pv)
                else:
                    mp.get_solar_power_previous_week()
                bdp = cf.BatteryPowerDispatcher
                B_total = bdp.get_all_dispatch_in_a_week(bdp,mp.predicted_df, pred_week)
                B = bdp.format_dispatching_for_competition(B_total, mp.predicted_df.index)
                B.to_csv('{}week{}.csv'.format(B_dir, pred_week))
                pbar.update()
        
    def get_scores_on_several_weeks(self,B_dir,predict_weeks=None,data_preprocesser=None):
        if data_preprocesser is None:
            dp = self.dp
        else:
            dp=data_preprocesser
        if predict_weeks is None:
            pred_weeks = self.pred_weeks
        else:
            pred_weeks = predict_weeks
        scores = []
        scores_mean = []
        with tqdm(total=len(pred_weeks), file=sys.stdout) as pbar:
            for pred_week in pred_weeks:
                sc = cf.ScoreComputer('{}week{}.csv'.format(B_dir, pred_week))
                score, score_mean = sc.compute_scores(dp.df, pred_week)
                scores.append(score)
                scores_mean.append(score_mean)
                pbar.update()
        return scores, scores_mean
    
    def compare_scores(self, scores, predict_weeks=None):
        if predict_weeks is None:
            pred_weeks = self.pred_weeks
        else:
            pred_weeks = predict_weeks
        score_cols = ['week', 'dow']
        for name in list(scores.keys()):
            score_cols.append('{}_r_peak'.format(name))
        for name in list(scores.keys()):
            score_cols.append('{}_p_solar'.format(name))
        for name in list(scores.keys()):
            score_cols.append('{}_s'.format(name))
        score_comps = []
        for i in range(len(pred_weeks)):
            score_comp = pd.DataFrame(index=scores[list(scores.keys())[0]][0].index)
            score_comp['dow'] = score_comp.index
            score_comp['dow'] = score_comp['dow'].apply(lambda x: x.replace('dow ',''))
            score_comp['week'] = pred_weeks[i]
            for name in list(scores.keys()):
                scores[name][i] = scores[name][i].rename(columns={'r_peak': '{}_r_peak'.format(name), 'p_solar': '{}_p_solar'.format(name), 
                                                  's': '{}_s'.format(name)})
                score_comp = pd.merge(score_comp,scores[name][i] , how='outer', left_index=True, right_index=True)
            score_comps.append(score_comp)
        score_comps = pd.concat(score_comps)
        score_comps = score_comps[score_cols]
        score_comps.index = range(score_comps.shape[0])
        return score_comps
        
                
                    
    
        

In [4]:
msc = cf.MultiScoresComparator(dp, pred_weeks)

## method naive

In [5]:
naive_dir = 'Output/test_raph/naive/'

In [6]:
msc.write_B_on_several_weeks_with_one_method(naive_dir)

100%|██████████| 19/19 [00:13<00:00,  1.37it/s]


In [46]:
naive_scores, naive_scores_mean = msc.get_scores_on_several_weeks(naive_dir,predict_weeks=pred_weeks,data_preprocesser=dp)

  0%|          | 0/19 [00:00<?, ?it/s]
100%|██████████| 7/7 [00:00<00:00, 81.07it/s]
  5%|▌         | 1/19 [00:00<00:01,  9.98it/s]
100%|██████████| 7/7 [00:00<00:00, 89.73it/s]

100%|██████████| 7/7 [00:00<00:00, 99.47it/s]
 16%|█▌        | 3/19 [00:00<00:01, 10.95it/s]
100%|██████████| 7/7 [00:00<00:00, 93.63it/s]

100%|██████████| 7/7 [00:00<00:00, 97.17it/s]
 26%|██▋       | 5/19 [00:00<00:01, 11.08it/s]
100%|██████████| 7/7 [00:00<00:00, 106.21it/s]

100%|██████████| 7/7 [00:00<00:00, 97.86it/s]
 37%|███▋      | 7/19 [00:00<00:01, 11.41it/s]
100%|██████████| 7/7 [00:00<00:00, 105.39it/s]

100%|██████████| 7/7 [00:00<00:00, 100.30it/s]
 47%|████▋     | 9/19 [00:00<00:00, 11.66it/s]
100%|██████████| 7/7 [00:00<00:00, 102.53it/s]

100%|██████████| 7/7 [00:00<00:00, 106.93it/s]
 58%|█████▊    | 11/19 [00:00<00:00, 11.92it/s]
100%|██████████| 7/7 [00:00<00:00, 95.05it/s]

100%|██████████| 7/7 [00:00<00:00, 99.65it/s]
 68%|██████▊   | 13/19 [00:01<00:00, 11.68it/s]
  0%|          | 0/7 

### Tests for pred demand only

In [47]:
pred_demand_dir = 'Output/test_raph/pred_demand/'
weather_columns_demand = dp.get_columns_of_group_names(['temp'], [1,2,5,6])
weather_columns_demand.append('sp')
weather_columns_demand.append('dow')
msc.write_B_on_several_weeks_with_one_method(pred_demand_dir, pred_demand=True, weather_cols_demand=weather_columns_demand)

100%|██████████| 19/19 [00:58<00:00,  3.08s/it]


In [48]:
pred_demand_dir = 'Output/test_raph/pred_demand/'
pred_demand_scores, pre_demand_scores_mean = msc.get_scores_on_several_weeks(pred_demand_dir,predict_weeks=pred_weeks,data_preprocesser=dp)

  0%|          | 0/19 [00:00<?, ?it/s]
100%|██████████| 7/7 [00:00<00:00, 84.90it/s]

100%|██████████| 7/7 [00:00<00:00, 88.49it/s]
 11%|█         | 2/19 [00:00<00:01, 10.50it/s]
100%|██████████| 7/7 [00:00<00:00, 104.22it/s]

100%|██████████| 7/7 [00:00<00:00, 106.88it/s]
 21%|██        | 4/19 [00:00<00:01, 11.53it/s]
100%|██████████| 7/7 [00:00<00:00, 96.18it/s]

100%|██████████| 7/7 [00:00<00:00, 90.52it/s]
 32%|███▏      | 6/19 [00:00<00:01, 11.39it/s]
100%|██████████| 7/7 [00:00<00:00, 92.73it/s]

100%|██████████| 7/7 [00:00<00:00, 100.43it/s]
 42%|████▏     | 8/19 [00:00<00:00, 11.49it/s]
100%|██████████| 7/7 [00:00<00:00, 91.69it/s]

100%|██████████| 7/7 [00:00<00:00, 96.89it/s]
 53%|█████▎    | 10/19 [00:00<00:00, 11.43it/s]
100%|██████████| 7/7 [00:00<00:00, 92.47it/s]

  0%|          | 0/7 [00:00<?, ?it/s][A
100%|██████████| 7/7 [00:00<00:00, 67.72it/s][A
 63%|██████▎   | 12/19 [00:01<00:00, 10.71it/s]
100%|██████████| 7/7 [00:00<00:00, 71.14it/s]

100%|██████████| 7/7 [00:

### Test with pv power prediction only

In [49]:
pred_pv_power_dir = 'Output/test_raph/pred_pv_power/'
msc.write_B_on_several_weeks_with_one_method(pred_pv_power_dir, pred_pv=True)

100%|██████████| 19/19 [02:02<00:00,  6.43s/it]


In [50]:
pred_pv_power_dir = 'Output/test_raph/pred_pv_power/'
pred_pv_power_scores, pred_pv_power_scores_mean = msc.get_scores_on_several_weeks(pred_pv_power_dir,predict_weeks=pred_weeks,data_preprocesser=dp)

  0%|          | 0/19 [00:00<?, ?it/s]
100%|██████████| 7/7 [00:00<00:00, 92.04it/s]

100%|██████████| 7/7 [00:00<00:00, 83.98it/s]
 11%|█         | 2/19 [00:00<00:01, 10.70it/s]
100%|██████████| 7/7 [00:00<00:00, 91.48it/s]

100%|██████████| 7/7 [00:00<00:00, 105.81it/s]
 21%|██        | 4/19 [00:00<00:01, 11.30it/s]
100%|██████████| 7/7 [00:00<00:00, 92.11it/s]

100%|██████████| 7/7 [00:00<00:00, 108.22it/s]
 32%|███▏      | 6/19 [00:00<00:01, 11.53it/s]
100%|██████████| 7/7 [00:00<00:00, 98.94it/s]

100%|██████████| 7/7 [00:00<00:00, 102.01it/s]
 42%|████▏     | 8/19 [00:00<00:00, 11.71it/s]
100%|██████████| 7/7 [00:00<00:00, 106.29it/s]

100%|██████████| 7/7 [00:00<00:00, 99.39it/s]
 53%|█████▎    | 10/19 [00:00<00:00, 11.89it/s]
100%|██████████| 7/7 [00:00<00:00, 97.34it/s]

100%|██████████| 7/7 [00:00<00:00, 102.50it/s]
 63%|██████▎   | 12/19 [00:01<00:00, 11.84it/s]
100%|██████████| 7/7 [00:00<00:00, 106.54it/s]

100%|██████████| 7/7 [00:00<00:00, 95.76it/s]
 74%|███████▎  | 14/

### Test with prediction on demand and pv

In [79]:
pred_pv_and_demand_dir = 'Output/test_raph/pred_pv_and_demand/'
weather_columns_demand = dp.get_columns_of_group_names(['temp'], [1,2,5,6])
weather_columns_demand.append('sp')
weather_columns_demand.append('dow')
weather_columns_pv = dp.get_columns_of_group_names(['temp'], [1,2])
weather_columns_pv += dp.get_columns_of_group_names(['solar'], [1,2,3,5,6])
weather_columns_pv.append('sp')
msc.write_B_on_several_weeks_with_one_method(pred_pv_and_demand_dir, pred_demand=True, weather_cols_demand=weather_columns_demand,
                                            pred_pv=True, weather_cols_pv = weather_columns_pv)

100%|██████████| 19/19 [02:44<00:00,  8.68s/it]


In [81]:
pred_pv_and_demand_dir = 'Output/test_raph/pred_pv_and_demand/'
pred_pv_demand_scores, pred_pv_and_demand_scores_mean = msc.get_scores_on_several_weeks(pred_pv_and_demand_dir)

  0%|          | 0/19 [00:00<?, ?it/s]
100%|██████████| 7/7 [00:00<00:00, 91.23it/s]

100%|██████████| 7/7 [00:00<00:00, 97.20it/s]
 11%|█         | 2/19 [00:00<00:01, 10.77it/s]
100%|██████████| 7/7 [00:00<00:00, 92.92it/s]

100%|██████████| 7/7 [00:00<00:00, 97.29it/s]
 21%|██        | 4/19 [00:00<00:01, 11.18it/s]
100%|██████████| 7/7 [00:00<00:00, 78.65it/s]

100%|██████████| 7/7 [00:00<00:00, 85.44it/s]
 32%|███▏      | 6/19 [00:00<00:01, 10.58it/s]
100%|██████████| 7/7 [00:00<00:00, 85.57it/s]

100%|██████████| 7/7 [00:00<00:00, 96.76it/s]
 42%|████▏     | 8/19 [00:00<00:01, 10.63it/s]
100%|██████████| 7/7 [00:00<00:00, 100.09it/s]

100%|██████████| 7/7 [00:00<00:00, 95.93it/s]
 53%|█████▎    | 10/19 [00:00<00:00, 10.98it/s]
100%|██████████| 7/7 [00:00<00:00, 92.98it/s]

100%|██████████| 7/7 [00:00<00:00, 106.83it/s]
 63%|██████▎   | 12/19 [00:01<00:00, 11.22it/s]
100%|██████████| 7/7 [00:00<00:00, 91.44it/s]

100%|██████████| 7/7 [00:00<00:00, 107.49it/s]
 74%|███████▎  | 14/19 

### Test with prediction on demand and pv 2

In [82]:
pred_pv_and_demand2_dir = 'Output/test_raph/pred_pv_and_demand2/'
weather_columns_demand = dp.get_columns_of_group_names(['temp'], [1,2,5,6])
weather_columns_demand.append('sp')
weather_columns_demand.append('dow')
weather_columns_pv = dp.get_columns_of_group_names(['temp'], [1,2])
weather_columns_pv += dp.get_columns_of_group_names(['solar'], [1,2,3,5,6])
weather_columns_pv.append('zenith_angle')
weather_columns_pv.append('sp')
msc.write_B_on_several_weeks_with_one_method(pred_pv_and_demand2_dir, pred_demand=True, weather_cols_demand=weather_columns_demand,
                                            pred_pv=True, weather_cols_pv = weather_columns_pv)

100%|██████████| 19/19 [02:52<00:00,  9.07s/it]


In [84]:
pred_pv_demand2_scores, pred_pv_and_demand2_scores_mean = msc.get_scores_on_several_weeks(pred_pv_and_demand2_dir)

  0%|          | 0/19 [00:00<?, ?it/s]
100%|██████████| 7/7 [00:00<00:00, 86.58it/s]
  5%|▌         | 1/19 [00:00<00:01,  9.69it/s]
100%|██████████| 7/7 [00:00<00:00, 97.50it/s]

100%|██████████| 7/7 [00:00<00:00, 84.24it/s]
 16%|█▌        | 3/19 [00:00<00:01, 10.35it/s]
100%|██████████| 7/7 [00:00<00:00, 93.43it/s]

100%|██████████| 7/7 [00:00<00:00, 92.78it/s]
 26%|██▋       | 5/19 [00:00<00:01, 10.67it/s]
100%|██████████| 7/7 [00:00<00:00, 92.96it/s]

100%|██████████| 7/7 [00:00<00:00, 101.99it/s]
 37%|███▋      | 7/19 [00:00<00:01, 10.87it/s]
100%|██████████| 7/7 [00:00<00:00, 93.59it/s]

100%|██████████| 7/7 [00:00<00:00, 90.17it/s]
 47%|████▋     | 9/19 [00:00<00:00, 10.77it/s]
100%|██████████| 7/7 [00:00<00:00, 91.65it/s]

100%|██████████| 7/7 [00:00<00:00, 99.62it/s]
 58%|█████▊    | 11/19 [00:01<00:00, 10.96it/s]
100%|██████████| 7/7 [00:00<00:00, 103.62it/s]

100%|██████████| 7/7 [00:00<00:00, 105.43it/s]
 68%|██████▊   | 13/19 [00:01<00:00, 11.38it/s]
100%|██████████| 7/7 [0

### Scores comparison

In [92]:
scores_dict = {}
scores_dict['naive'] = naive_scores
scores_dict['pred_demand'] = pred_demand_scores
scores_dict['pred_pv_power'] = pred_pv_power_scores
scores_dict['pred_pv_demand'] = pred_pv_demand_scores
scores_dict['pred_pv_demand2'] = pred_pv_demand2_scores
score_comps = msc.compare_scores(scores_dict)

In [93]:
score_comps

Unnamed: 0,week,dow,naive_r_peak,pred_demand_r_peak,pred_pv_power_r_peak,pred_pv_demand_r_peak,pred_pv_demand2_r_peak,naive_p_solar,pred_demand_p_solar,pred_pv_power_p_solar,pred_pv_demand_p_solar,pred_pv_demand2_p_solar,naive_s,pred_demand_s,pred_pv_power_s,pred_pv_demand_s,pred_pv_demand2_s
0,10,0,28.654971,31.032272,28.654971,31.032272,31.032272,0.0025,0.0025,0.0025,0.0025,0.0025,28.798246,31.187433,28.798246,31.187433,31.187433
1,10,1,32.995951,35.072874,32.995951,35.072874,35.072874,1.0,1.0,0.999996,0.999996,1.0,98.987854,105.218623,98.987578,105.21833,105.218623
2,10,2,28.397566,35.091278,28.397566,35.091278,35.091278,0.125,0.125,0.999932,0.999945,0.999941,35.496957,43.864097,85.188834,105.269945,105.269686
3,10,3,24.842105,35.980351,24.842105,35.980351,35.980351,0.3525,0.3525,0.99995,0.999908,0.999496,42.355789,61.346498,74.523852,107.934456,107.904766
4,10,4,28.230616,30.109565,28.230616,30.109565,30.109565,0.020833,0.020833,0.253603,0.253658,0.254027,29.406892,31.36413,42.549342,45.384649,45.406876
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128,28,2,31.767956,41.240025,31.767956,41.240025,41.240025,1.0,1.0,0.999983,0.999982,0.99999,95.303867,123.720074,95.3028,123.718596,123.719238
129,28,3,32.580645,36.858781,32.580645,36.858781,36.858781,0.999765,0.999765,0.999991,0.999993,0.999996,97.726616,110.559013,97.741367,110.57583,110.576022
130,28,4,38.141026,41.666667,38.141026,41.666667,41.666667,0.997597,0.997597,0.999459,0.999456,0.999518,114.239795,124.799776,114.381771,124.954677,124.959795
131,28,5,39.007092,43.066194,39.007092,43.066194,43.066194,0.99976,0.99976,0.99998,0.999983,0.999988,117.00255,129.177906,117.019711,129.19713,129.197544


In [94]:
score_comps.mean()

week                        1.900000e+01
dow                        9.282407e+128
naive_r_peak                3.044237e+01
pred_demand_r_peak          3.531896e+01
pred_pv_power_r_peak        3.044237e+01
pred_pv_demand_r_peak       3.531896e+01
pred_pv_demand2_r_peak      3.531896e+01
naive_p_solar               8.664246e-01
pred_demand_p_solar         8.664246e-01
pred_pv_power_p_solar       9.561432e-01
pred_pv_demand_p_solar      9.561575e-01
pred_pv_demand2_p_solar     9.563254e-01
naive_s                     8.424955e+01
pred_demand_s               9.775917e+01
pred_pv_power_s             8.929453e+01
pred_pv_demand_s            1.036514e+02
pred_pv_demand2_s           1.036619e+02
dtype: float64

In [95]:
score_comps[score_comps['week'] >= 24].mean()

week                       2.600000e+01
dow                        3.527315e+31
naive_r_peak               3.508894e+01
pred_demand_r_peak         3.960519e+01
pred_pv_power_r_peak       3.508894e+01
pred_pv_demand_r_peak      3.960519e+01
pred_pv_demand2_r_peak     3.960519e+01
naive_p_solar              9.702644e-01
pred_demand_p_solar        9.702644e-01
pred_pv_power_p_solar      9.932258e-01
pred_pv_demand_p_solar     9.932408e-01
pred_pv_demand2_p_solar    9.933123e-01
naive_s                    1.032671e+02
pred_demand_s              1.165068e+02
pred_pv_power_s            1.048164e+02
pred_pv_demand_s           1.182903e+02
pred_pv_demand2_s          1.182957e+02
dtype: float64