In [1]:
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
from sklearn.linear_model import LinearRegression, Ridge, ElasticNet, BayesianRidge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.isotonic import IsotonicRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import RepeatedKFold, GridSearchCV
from sklearn.svm import SVR
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-whitegrid')
from tqdm import tqdm
import sys
from pvlib import solarposition
import common_functions_by_date as cfbd
import datetime

In [8]:
weather_path = '../task1/Input/weather_train_set1.csv'
demand_path = '../task1/Input/demand_train_set1.csv'
solar_path = '../task1/Input/pv_train_set1.csv'
dp = cfbd.DataPreprocesser(weather_path, demand_path, solar_path)
dp.load_df()
dp.remove_nan()
dp.interpolate_df()
dp.get_zenith_angle()
dp.get_poa_and_ghi_irradiance()
dp.df.head()

Unnamed: 0_level_0,demand_MW,irradiance_Wm-2,pv_power_mw,panel_temp_C,temp_location3,temp_location6,temp_location2,temp_location4,temp_location5,temp_location1,...,solar_location5,solar_location1,week,dow,date,hour,sp,zenith_angle,GHI,POA
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-11-03 00:00:00,2.19,0.0,0.0,7.05,7.46,13.2,9.64,6.68,13.09,8.56,...,0.0,0.0,44,4,2017-11-03,0,1.0,122.821452,0.0,0.0
2017-11-03 00:30:00,2.14,0.0,0.0,7.38,7.3,13.26,9.675,6.475,13.15,8.625,...,0.0,0.0,44,4,2017-11-03,0,2.0,115.780705,0.0,0.0
2017-11-03 01:00:00,2.01,0.0,0.0,7.7,7.14,13.32,9.71,6.27,13.21,8.69,...,0.0,0.0,44,4,2017-11-03,1,3.0,108.66807,0.0,0.0
2017-11-03 01:30:00,1.87,0.0,0.0,7.48,7.0,13.34,9.72,6.09,13.255,8.715,...,0.0,0.0,44,4,2017-11-03,1,4.0,101.505357,0.0,0.0
2017-11-03 02:00:00,1.86,0.0,0.0,7.2,6.86,13.36,9.73,5.91,13.3,8.74,...,0.0,0.0,44,4,2017-11-03,2,5.0,94.307845,0.0,0.0


In [6]:
first_day_pred=datetime.datetime(2018,10,16).date()

In [4]:
first_day_pred

datetime.date(2018, 10, 16)

In [5]:
mp = cfbd.MLPredictor(dp,first_day_pred)
mp.get_demand_previous_week()
mp.get_weather_prediction(weather_path)
weather_columns_pv = dp.get_columns_of_group_names(['solar'], [1,2,3,5,6])
weather_columns_pv += dp.get_columns_of_group_names(['temp'], [1,2])
weather_columns_pv.append('sp')
weather_columns_pv.append('zenith_angle')
weather_columns_demand = dp.get_columns_of_group_names(['temp'], [1,2,5,6])
weather_columns_demand.append('sp')
weather_columns_demand.append('dow')
        
#         predicted_data = mp.predicted_df
model_pv = RandomForestRegressor(random_state=2019, n_estimators = 300)
#         model_pv.fit(X_train_pv, y_train_pv)
mp.predict_solar_power_weeks_before(model_pv, weather_cols=weather_columns_pv)
#         predicted_data['pv_power_mw'] = model_pv.predict(predicted_data[weather_columns_pv].values)
model_demand = RandomForestRegressor(random_state=2019, n_estimators = 450)
#         model_demand.fit(X_train_demand, y_train_demand)
#         predicted_data['demand_MW'] = model_demand.predict(predicted_data[weather_columns_demand].values)
mp.predict_demand_from_past_and_weather(model_demand, weather_cols=weather_columns_demand)
bpd = cfbd.BatteryPowerDispatcher
B = bpd.get_all_dispatch_in_a_week(bpd,mp.predicted_df, first_day_pred)
B_final = bpd.format_dispatching_for_competition(B, mp.predicted_df.index)

In [6]:
B.sum()

20181016    3.774758e-15
20181017   -3.108624e-15
20181018   -1.998401e-15
20181019   -2.220446e-16
20181020    6.661338e-16
20181021   -1.332268e-15
20181022    8.881784e-16
dtype: float64

In [8]:
B_final.to_csv('../task1/Output/BGBattery_set1_2.csv')

In [9]:
demand_task2_path = '../task2/Input/demand_train_set2.csv'
solar_power_task2_path = '../task2/Input/pv_train_set2.csv'
weather_task2_path = '../task2/Input/weather_train_set2.csv'
dp_task2 = cfbd.DataPreprocesser(demand_path=demand_task2_path, solar_path=solar_power_task2_path, weather_path=weather_task2_path)
dp_task2.load_df()
dp_task2.remove_nan()
dp_task2.interpolate_df()

In [10]:
demand_and_solar_power = dp_task2.df.loc[(dp_task2.df.index.date >= first_day_pred) & 
                                         (dp_task2.df.index.date < first_day_pred+ datetime.timedelta(days=7)), :]
demand_and_solar_power

Unnamed: 0_level_0,demand_MW,irradiance_Wm-2,pv_power_mw,panel_temp_C,temp_location3,temp_location6,temp_location2,temp_location4,temp_location5,temp_location1,...,solar_location6,solar_location2,solar_location4,solar_location5,solar_location1,week,dow,date,hour,sp
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-10-16 00:00:00,1.95,0.0,0.0,12.41,11.650,14.870,12.670,11.450,14.190,11.740,...,0.0,0.0,0.0,0.0,0.0,42,1,2018-10-16,0,1.0
2018-10-16 00:30:00,1.87,0.0,0.0,12.38,11.810,14.850,12.650,11.610,14.190,11.720,...,0.0,0.0,0.0,0.0,0.0,42,1,2018-10-16,0,2.0
2018-10-16 01:00:00,1.80,0.0,0.0,12.40,11.970,14.830,12.630,11.770,14.190,11.700,...,0.0,0.0,0.0,0.0,0.0,42,1,2018-10-16,1,3.0
2018-10-16 01:30:00,1.76,0.0,0.0,12.63,11.930,14.825,12.600,11.750,14.155,11.645,...,0.0,0.0,0.0,0.0,0.0,42,1,2018-10-16,1,4.0
2018-10-16 02:00:00,1.76,0.0,0.0,12.91,11.890,14.820,12.570,11.730,14.120,11.590,...,0.0,0.0,0.0,0.0,0.0,42,1,2018-10-16,2,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-10-22 21:30:00,2.37,0.0,0.0,3.30,7.985,12.885,8.845,6.710,12.530,8.720,...,0.0,0.0,0.0,0.0,0.0,43,0,2018-10-22,21,44.0
2018-10-22 22:00:00,2.11,0.0,0.0,3.15,8.010,12.760,8.980,6.220,12.400,8.890,...,0.0,0.0,0.0,0.0,0.0,43,0,2018-10-22,22,45.0
2018-10-22 22:30:00,1.90,0.0,0.0,3.32,7.705,12.690,8.615,5.925,12.360,8.695,...,0.0,0.0,0.0,0.0,0.0,43,0,2018-10-22,22,46.0
2018-10-22 23:00:00,1.81,0.0,0.0,2.54,7.400,12.620,8.250,5.630,12.320,8.500,...,0.0,0.0,0.0,0.0,0.0,43,0,2018-10-22,23,47.0


In [11]:
BGBattery_path = '../task1/Output/BGBattery_set1_2.csv'

In [12]:
BGBattery_sc = cfbd.ScoreComputer(BGBattery_path)
BGBattery_sc.compute_scores(demand_and_solar_power, first_day_pred)

100%|██████████| 7/7 [00:00<00:00, 102.87it/s]


(             r_peak   p_solar           s
 20181016  32.499739  0.683837   76.948807
 20181017  32.167428  0.626352   72.463679
 20181018  34.204007  0.995123  102.278411
 20181019  32.553062  0.923736   92.693934
 20181020   32.97789       1.0   98.933669
 20181021  30.672629       1.0   92.017886
 20181022   29.89071  0.999876   89.664737,
 r_peak     32.137924
 p_solar     0.889846
 s          89.285875
 dtype: float64)

## Prediction with rectify forecast demand and smooth, ml prediction for solar power

In [9]:
mp = cfbd.MLPredictor(dp,first_day_pred)
mp.get_demand_previous_week()
mp.get_weather_prediction(weather_path)
mp.pred_demand_with_forecast_method_and_average_with_previous_weeks(dp.df, '../task1/rectify_forecast', first_day_pred, 'demand_MW', compute_forecast=True)
mp.predict_pv_power_smooth_and_square_irr()

week prediction with start day :  2018-10-16
  0%|          | 0/336 [00:00<?, ?it/s]2018-10-16 00:00:00
KNeighborsRegressor(n_neighbors=24)
100%|██████████| 336/336 [01:12<00:00,  4.66it/s]
week prediction with start day :  2018-10-09
  0%|          | 0/336 [00:00<?, ?it/s]2018-10-09 00:00:00
KNeighborsRegressor(n_neighbors=24)
100%|██████████| 336/336 [01:17<00:00,  4.36it/s]
week prediction with start day :  2018-10-02
  0%|          | 0/336 [00:00<?, ?it/s]2018-10-02 00:00:00
KNeighborsRegressor(n_neighbors=32)
100%|██████████| 336/336 [01:17<00:00,  4.32it/s]
week prediction with start day :  2018-09-25
  0%|          | 0/336 [00:00<?, ?it/s]2018-09-25 00:00:00
KNeighborsRegressor(n_neighbors=52)
100%|██████████| 336/336 [01:15<00:00,  4.48it/s]
week prediction with start day :  2018-09-18
  0%|          | 0/336 [00:00<?, ?it/s]2018-09-18 00:00:00
KNeighborsRegressor(n_neighbors=59)
100%|██████████| 336/336 [01:19<00:00,  4.24it/s]
smooth_phaze : -3
weather dephasage : 0
smooth_pha