In [1]:
import random
import pandas as pd 
from copy import deepcopy
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from xgboost.sklearn import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_absolute_error, median_absolute_error,mean_squared_error
import matplotlib.pyplot as plt
import numpy as np
import pickle
# from skopt import BayesSearchCV
import datetime as dt
import itertools

import warnings
warnings.filterwarnings('ignore')

In [2]:
data_path = "/Users/jurajkapasny/Data/energy_hack/"
df = pd.read_csv(data_path+"spotreba_prepared.csv",sep = ";")

In [3]:
df["timestamp"] = df["Dátum a čas"] + " " + df["Unnamed: 1"]
df = df[["spotreba","om","timestamp"]]
df.head()

Unnamed: 0,spotreba,om,timestamp
0,0.211,1,2016-01-01 00:15:00
1,0.21,1,2016-01-01 00:30:00
2,0.21,1,2016-01-01 00:45:00
3,0.206,1,2016-01-01 01:00:00
4,0.205,1,2016-01-01 01:15:00


In [4]:
om_info = pd.read_csv(data_path+"om_info_prepared.csv",sep = ";")
om_info.drop("Unnamed: 0",axis = 1, inplace=True)

In [51]:
om_info.head()

Unnamed: 0,Číslo OM,Zapojenie,Druh tarify,Inštalovaný výkon FVE (kWp)
0,1,A,D2,2.0
1,2,A,D2,7.2
2,3,A,D2,2.0
3,4,A,D1,0.0
4,5,C,D4,9.2


#### Adding tarif to main_df 

In [5]:
df_final = df.merge(om_info[["Číslo OM",
                             "Druh tarify",
                             "Inštalovaný výkon FVE (kWp)"]], 
                    how = "left", 
                    left_on = "om", 
                    right_on = "Číslo OM")

In [6]:
df_final.head()

Unnamed: 0,spotreba,om,timestamp,Číslo OM,Druh tarify,Inštalovaný výkon FVE (kWp)
0,0.211,1,2016-01-01 00:15:00,1,D2,2.0
1,0.21,1,2016-01-01 00:30:00,1,D2,2.0
2,0.21,1,2016-01-01 00:45:00,1,D2,2.0
3,0.206,1,2016-01-01 01:00:00,1,D2,2.0
4,0.205,1,2016-01-01 01:15:00,1,D2,2.0


#### Adding vyroba

In [7]:
vyroba = pd.read_csv(data_path+"vyroba_prepared.csv",sep = ";")
# vyroba.drop("Unnamed: 0",axis = 1, inplace=True)

In [8]:
vyroba_tr = pd.DataFrame(vyroba.set_index("time").stack()).reset_index()
vyroba_tr.tail()

Unnamed: 0,time,level_1,0
245947,2016-12-31 23:44:59.993,3.15,0.0
245948,2016-12-31 23:44:59.993,4.0,0.0
245949,2016-12-31 23:44:59.993,5.0,0.0
245950,2016-12-31 23:44:59.993,7.2,0.0
245951,2016-12-31 23:44:59.993,9.2,0.0


In [9]:
vyroba_tr.time = pd.to_datetime(vyroba_tr.time).dt.round('1s')
vyroba_tr.level_1 = vyroba_tr.level_1.astype(float)
df_final.timestamp = pd.to_datetime(df_final.timestamp)

In [10]:
vyroba_tr.head()

Unnamed: 0,time,level_1,0
0,2016-01-01,2.0,0.0
1,2016-01-01,2.5,0.0
2,2016-01-01,3.15,0.0
3,2016-01-01,4.0,0.0
4,2016-01-01,5.0,0.0


In [11]:
df_final = df_final.merge(vyroba_tr[["time","level_1",0]], 
                          how="left",
                          left_on = ["timestamp","Inštalovaný výkon FVE (kWp)"],
                          right_on = ["time","level_1"])

In [12]:
df_final.drop(["time","level_1","Číslo OM"],axis = 1, inplace=True)

In [13]:
df_final.count()

spotreba                       5245996
om                             5245996
timestamp                      5245996
Druh tarify                    5245996
Inštalovaný výkon FVE (kWp)    5245996
0                              3489396
dtype: int64

In [14]:
df_final.fillna(0,inplace = True)

In [15]:
# df_final.head()
df_final = df_final.rename({0: "vyroba", 
                            "Inštalovaný výkon FVE (kWp)":"vykon",
                           "Druh tarify":"tarifa"}, axis=1)

In [16]:
len(df_final.om.unique())

150

#### getting the diff for everyone everytime

In [17]:
df_final["diff"] = df_final.vyroba - df_final.spotreba

# total_spotreba_per_day = df_with_day[["day","spotreba","vyroba"]].groupby("day")[["spotreba","vyroba"]].sum()

In [18]:
df_final.count()

spotreba     5245996
om           5245996
timestamp    5245996
tarifa       5245996
vykon        5245996
vyroba       5245996
diff         5245996
dtype: int64

In [19]:
df_final.head()

Unnamed: 0,spotreba,om,timestamp,tarifa,vykon,vyroba,diff
0,0.211,1,2016-01-01 00:15:00,D2,2.0,0.0,-0.211
1,0.21,1,2016-01-01 00:30:00,D2,2.0,0.0,-0.21
2,0.21,1,2016-01-01 00:45:00,D2,2.0,0.0,-0.21
3,0.206,1,2016-01-01 01:00:00,D2,2.0,0.0,-0.206
4,0.205,1,2016-01-01 01:15:00,D2,2.0,0.0,-0.205


#### percentages

In [20]:
all_times = list(df_final.timestamp.unique())
df_final["viac_spotreby"] = 0
df_final.loc[df_final["diff"] <= 0, "viac_spotreby"] = 1

In [21]:
temp_gr = df_final.groupby(["timestamp","viac_spotreby","om"])[["diff"]].sum()

In [22]:
temp_gr_pcts = temp_gr.groupby(level=[0,1]).apply(lambda x:
                                                 100 * x / float(x.sum()))

In [23]:
temp_gr_pcts = temp_gr_pcts.rename({"diff": "percentage"}, axis=1)
temp_gr_pcts = temp_gr_pcts.reset_index()

In [24]:
df_final = df_final.merge(temp_gr_pcts.reset_index(), how = "left", on = ["timestamp","om"])

In [25]:
df_final.head()

Unnamed: 0,spotreba,om,timestamp,tarifa,vykon,vyroba,diff,viac_spotreby_x,index,viac_spotreby_y,percentage
0,0.211,1,2016-01-01 00:15:00,D2,2.0,0.0,-0.211,1,147,1,0.697163
1,0.21,1,2016-01-01 00:30:00,D2,2.0,0.0,-0.21,1,294,1,0.696575
2,0.21,1,2016-01-01 00:45:00,D2,2.0,0.0,-0.21,1,441,1,0.696899
3,0.206,1,2016-01-01 01:00:00,D2,2.0,0.0,-0.206,1,588,1,0.66894
4,0.205,1,2016-01-01 01:15:00,D2,2.0,0.0,-0.205,1,735,1,0.692088


In [26]:
df_final = df_final.rename({"viac_spotreby_x": "viac_spotreby"}, axis=1)
df_final.drop("viac_spotreby_y", axis = 1, inplace=True)

In [27]:
# total_energy_left = df_final[["timestamp","diff"]].groupby("timestamp").sum("diff")

In [34]:
# only those with profit 
# df_with_day_profit = temp[temp["diff"] > 0]
# df_with_day_loss = temp[temp["diff"] <= 0]
# df_with_day_loss["percentage"] = df_with_day_loss["diff"] / df_with_day_loss["diff"].sum()
# df_with_day_profit["percentage"] = df_with_day_profit["diff"] / df_with_day_profit["diff"].sum()

# total_energy_left = temp[temp["diff"] > 0]["diff"].sum()
# total_energy_needed = -temp[temp["diff"] <= 0]["diff"].sum()
total_energy_left = df_final[["timestamp","viac_spotreby","diff"]].groupby(["timestamp","viac_spotreby"]).sum()
index = pd.MultiIndex.from_product([all_times, [0,1]], names = ["timestamp", "viac_spotreby"])
all_comb = pd.DataFrame(index = index).reset_index()

total_energy_left = all_comb.merge(total_energy_left.reset_index(), how = "left", on = ["timestamp","viac_spotreby"])

total_energy_left.fillna(0, inplace=True)

# bad_guys = list(temp[temp["diff"] < 0].om.unique())

In [35]:
total_energy_left.head()

Unnamed: 0,timestamp,viac_spotreby,diff
0,2016-01-01 00:15:00,0,0.0
1,2016-01-01 00:15:00,1,-30.2655
2,2016-01-01 00:30:00,0,0.0
3,2016-01-01 00:30:00,1,-30.1475
4,2016-01-01 00:45:00,0,0.0


In [36]:
total_energy_left["diff_abs"] = total_energy_left["diff"].abs()
temp_max = total_energy_left.groupby(["timestamp"])[["diff_abs"]].max()
temp_max["ind"] = "value"
temp_max = temp_max.reset_index()

temp_max = total_energy_left.merge(temp_max,
                       how = "left",
                                  on = ["timestamp","diff_abs"])

temp_max = temp_max[temp_max.ind.notnull()][["timestamp","viac_spotreby"]]
temp_max.rename({"viac_spotreby":"viac_spotreby_total"},axis = 1, inplace=True)
temp_max.head()

Unnamed: 0,timestamp,viac_spotreby_total
1,2016-01-01 00:15:00,1
3,2016-01-01 00:30:00,1
5,2016-01-01 00:45:00,1
7,2016-01-01 01:00:00,1
9,2016-01-01 01:15:00,1


In [37]:
total_energy_left.rename({"diff": "diff_total"},
                                            axis =1,
                                            inplace = True)

In [39]:
df_final = df_final.merge(total_energy_left[["timestamp","viac_spotreby","diff_total"]], how = "left", on = ["timestamp","viac_spotreby"])

In [40]:
df_final.head()

Unnamed: 0,spotreba,om,timestamp,tarifa,vykon,vyroba,diff,viac_spotreby,index,percentage,diff_total
0,0.211,1,2016-01-01 00:15:00,D2,2.0,0.0,-0.211,1,147,0.697163,-30.2655
1,0.21,1,2016-01-01 00:30:00,D2,2.0,0.0,-0.21,1,294,0.696575,-30.1475
2,0.21,1,2016-01-01 00:45:00,D2,2.0,0.0,-0.21,1,441,0.696899,-30.1335
3,0.206,1,2016-01-01 01:00:00,D2,2.0,0.0,-0.206,1,588,0.66894,-30.795
4,0.205,1,2016-01-01 01:15:00,D2,2.0,0.0,-0.205,1,735,0.692088,-29.6205


In [41]:
res = df_final.copy(deep=True)

In [42]:
res = res.merge(temp_max, on = "timestamp")

#### what to buy and sell to community

In [45]:
res["nakup_from_community"] = 0
res.loc[(res.viac_spotreby == 1) & (res.viac_spotreby_total == 1), "nakup_from_community"] = \
        (res.loc[(res.viac_spotreby == 1) & (res.viac_spotreby_total == 1)].percentage / 100 ) * res.loc[res.viac_spotreby == 0].diff_total.values[0]

res.loc[(res.viac_spotreby == 1) & (res.viac_spotreby_total == 0), "nakup_from_community"] = \
        -res.loc[(res.viac_spotreby == 1) & (res.viac_spotreby_total == 0)]["diff"]


res["nakup_from_network"] = 0
res.loc[res.viac_spotreby == 1, "nakup_from_network"] = \
- res.loc[res.viac_spotreby == 1]["diff"] - res.loc[res.viac_spotreby == 1]["nakup_from_community"]

    
res["predaj_to_community"] = 0
res.loc[(res.viac_spotreby == 0) & (res.viac_spotreby_total == 0), "predaj_to_community"] = \
        (res.loc[(res.viac_spotreby == 0) & (res.viac_spotreby_total == 0)].percentage / 100 ) * res.loc[res.viac_spotreby == 1].diff_total.values[0]

res.loc[(res.viac_spotreby == 0) & (res.viac_spotreby_total == 1), "predaj_to_community"] = \
        res.loc[(res.viac_spotreby == 0) & (res.viac_spotreby_total == 1)]["diff"]

    
res["predaj_to_network"] = 0
res.loc[res.viac_spotreby == 0, "predaj_to_network"] = \
         res.loc[res.viac_spotreby == 0]["diff"] - res.loc[res.viac_spotreby == 0]["predaj_to_community"]

In [47]:
res = res.drop("index", axis = 1)

In [49]:
res.head()

Unnamed: 0,spotreba,om,timestamp,tarifa,vykon,vyroba,diff,viac_spotreby,percentage,diff_total,viac_spotreby_total,nakup_from_community,nakup_from_network,predaj_to_community,predaj_to_network
0,0.211,1,2016-01-01 00:15:00,D2,2.0,0.0,-0.211,1,0.697163,-30.2655,1,2.9e-05,0.210971,0.0,0.0
1,0.06,2,2016-01-01 00:15:00,D2,7.2,0.0,-0.06,1,0.198246,-30.2655,1,8e-06,0.059992,0.0,0.0
2,0.292,3,2016-01-01 00:15:00,D2,2.0,0.0,-0.292,1,0.964795,-30.2655,1,4.1e-05,0.291959,0.0,0.0
3,1.535,4,2016-01-01 00:15:00,D1,0.0,0.0,-1.535,1,5.071781,-30.2655,1,0.000215,1.534785,0.0,0.0
4,0.46,5,2016-01-01 00:15:00,D4,9.2,0.0,-0.46,1,1.519882,-30.2655,1,6.4e-05,0.459936,0.0,0.0


In [53]:
res.to_csv(data_path + "data_prepared_for_dashboard.csv", sep = ";", index = False)