In [None]:
import pandas as pd
from src.data import ReadData,PrepareData
from src.visualization import visualize
from src.models import Models

INDEX_COLUMN = 'TIMESTAMP'
TARGET_COLUMN = 'POWER'
ONE_DAY = 24
ONE_WEEK = 7 * ONE_DAY
ONE_MONTH = 30 * ONE_DAY
ONE_YEAR = 365 * ONE_DAY

df_power,df_features = ReadData.read()
df_original = PrepareData.join(df_power,df_features)

In [None]:
column_mapping = {"VAR78":"LIQUID_WATER", 
                  "VAR79": "ICE_WATER",
                  "VAR134":"SURFACE_PRESSURE",
                  "VAR157":"RELATIVE_HUMIDITY",
                  "VAR164":"TOTAL_CLOUD_COVER",
                  "VAR165":"WIND_U",
                  "VAR166":"WIND_V",
                  "VAR167":"TEMPERATURE",
                  "VAR169":"SOLAR_RAD",
                  "VAR175":"TERMAL_RAD",
                  "VAR178":"TOP_NET_SOLAR_RAD",
                  "VAR228":"TOTAL_PRECIPATION"}

df = df_original.copy()
dfs = PrepareData.split_by_zone(df)
for i in range(len(dfs)):
    dfs[i] = PrepareData.set_timestamp_as_index(dfs[i])
    dfs[i] = PrepareData.extract_data_from_timestamp(dfs[i])
    dfs[i] = PrepareData.rename_columns(dfs[i], column_mapping)
    
df_all = pd.concat(dfs)

In [None]:
visualize.plot_boxwhiskers(df_all)

In [None]:
column_mapping = {"ZONEID": "Zóna", "POWER": "Teljesítémény", "LIQUID_WATER":"Folyékony csapadék", "ICE_WATER": "Jeges csapadék",  "SURFACE_PRESSURE":"Felületi nyomás","RELATIVE_HUMIDITY":"Relatív páratartalom","TOTAL_CLOUD_COVER":"Teljes felhőtakaró","WIND_U":"Szél(U)","WIND_V":"Szél(V)","TEMPERATURE":"Hőmérséklet","SOLAR_RAD":"Napsugárzás","VAR175":"Hősugárzás","TOP_NET_SOLAR_RAD":"Napsugárzás atm. tetején","TOTAL_PRECIPATION":"Össz. csapadék","HOUR":'Óra','MONTH':'Hónap','YEAR':'Év','DAY':'Nap'}
visualize.plot_boxplots(dfs[0][["TOP_NET_SOLAR_RAD","SOLAR_RAD"]].rename(index=str,columns=column_mapping))

In [None]:
for i in range(len(dfs)):
    dfs[i] = PrepareData.dissipate_features(dfs[i])
    dfs[i] = PrepareData.transform_outliers(dfs[i])

In [None]:
column_mapping = {"ZONEID": "Zóna", "POWER": "Teljesítémény", "LIQUID_WATER":"Folyékony csapadék", "ICE_WATER": "Jeges csapadék",  "SURFACE_PRESSURE":"Felületi nyomás","RELATIVE_HUMIDITY":"Relatív páratartalom","TOTAL_CLOUD_COVER":"Teljes felhőtakaró","WIND_U":"Szél(U)","WIND_V":"Szél(V)","TEMPERATURE":"Hőmérséklet","SOLAR_RAD":"Napsugárzás","VAR175":"Hősugárzás","TOP_NET_SOLAR_RAD":"Napsugárzás atm. tetején","TOTAL_PRECIPATION":"Össz. csapadék","HOUR":'Óra','MONTH':'Hónap','YEAR':'Év','DAY':'Nap'}
visualize.plot_boxplots(dfs[0][["TOP_NET_SOLAR_RAD","SOLAR_RAD"]].rename(index=str,columns=column_mapping))

In [None]:
for df in dfs:
    visualize.plot_scatters(df,"POWER")

In [None]:
df = pd.concat(dfs)
column_mapping = {"ZONEID": "Zóna", "POWER": "Teljesítémény", "LIQUID_WATER":"Folyékony csapadék", "ICE_WATER": "Jeges csapadék",  "SURFACE_PRESSURE":"Felületi nyomás","RELATIVE_HUMIDITY":"Relatív páratartalom","TOTAL_CLOUD_COVER":"Teljes felhőtakaró","WIND_U":"Szél(U)","WIND_V":"Szél(V)","TEMPERATURE":"Hőmérséklet","SOLAR_RAD":"Napsugárzás","VAR175":"Hősugárzás","TOP_NET_SOLAR_RAD":"Napsugárzás atm. tetején","TOTAL_PRECIPATION":"Össz. csapadék","HOUR":'Óra','MONTH':'Hónap','YEAR':'Év','DAY':'Nap'}
df_to_plot = df[['POWER','TOP_NET_SOLAR_RAD','SOLAR_RAD','TEMPERATURE','RELATIVE_HUMIDITY','HOUR']].rename(index=str,columns=column_mapping)
visualize.plot_heatmap(df_to_plot)

In [None]:
visualize.plot_lags_and_auto(df_to_plot)

In [None]:
visualize.plot_moving_average_heatmap(dfs[0][['POWER']])

In [None]:
Models.naive(dfs,ONE_MONTH)

In [None]:
Models.fft(dfs[0],ONE_MONTH,ONE_WEEK)

In [None]:
import numpy as np
from sklearn.ensemble import AdaBoostRegressor,BaggingRegressor,RandomForestRegressor
from sklearn.linear_model import LinearRegression,Lasso,ElasticNet,Ridge,SGDRegressor
from xgboost import XGBRegressor
names=["XGBoost","RandomForestRegressor","AdaBoostRegressor", "BaggingRegressor"]
models = [XGBRegressor(nthread=8),RandomForestRegressor(),AdaBoostRegressor(),BaggingRegressor()]
windows = np.arange(1,7)

print("One week")

print("\nJust Power")
Models.do_all(dfs,["POWER","HOUR","ZONEID"],models,names,['POWER'],windows,ONE_DAY,ONE_YEAR,ONE_WEEK,True)

print("\nJust Features")
Models.do_all(dfs,["POWER","HOUR","ZONEID",'TOP_NET_SOLAR_RAD','SOLAR_RAD','TEMPERATURE','RELATIVE_HUMIDITY'],models,names,[TARGET_COLUMN],windows,ONE_DAY,ONE_YEAR,ONE_WEEK,False)

print("\nFeatures + Rolling")
Models.do_all(dfs,["POWER","HOUR","ZONEID",'TOP_NET_SOLAR_RAD','SOLAR_RAD','TEMPERATURE','RELATIVE_HUMIDITY'],models,names,['POWER','TOP_NET_SOLAR_RAD','SOLAR_RAD'],windows,ONE_DAY,ONE_YEAR,ONE_WEEK,True)

print("One month")

print("\nJust Power")
Models.do_all(dfs,["POWER","HOUR","ZONEID"],models,names,['POWER'],windows,ONE_DAY,ONE_YEAR,ONE_MONTH,True)

print("\nJust Features")
Models.do_all(dfs,["POWER","HOUR","ZONEID",'TOP_NET_SOLAR_RAD','SOLAR_RAD','TEMPERATURE','RELATIVE_HUMIDITY'],models,names,[TARGET_COLUMN],windows,ONE_DAY,ONE_YEAR,ONE_MONTH,False)

print("\nFeatures + Rolling")
Models.do_all(dfs,["POWER","HOUR","ZONEID",'TOP_NET_SOLAR_RAD','SOLAR_RAD','TEMPERATURE','RELATIVE_HUMIDITY'],models,names,['POWER','TOP_NET_SOLAR_RAD','SOLAR_RAD'],windows,ONE_DAY,ONE_YEAR,ONE_MONTH,True)

print("One year")

print("\nJust Power")
Models.do_all(dfs,["POWER","HOUR","ZONEID"],models,names,['POWER'],windows,ONE_DAY,ONE_YEAR,ONE_YEAR,True,False)

print("\nJust Features")
Models.do_all(dfs,["POWER","HOUR","ZONEID",'TOP_NET_SOLAR_RAD','SOLAR_RAD','TEMPERATURE','RELATIVE_HUMIDITY'],models,names,[TARGET_COLUMN],windows,ONE_DAY,ONE_YEAR,ONE_YEAR,False)

print("\nFeatures + Rolling")
Models.do_all(dfs,["POWER","HOUR","ZONEID",'TOP_NET_SOLAR_RAD','SOLAR_RAD','TEMPERATURE','RELATIVE_HUMIDITY'],models,names,['POWER','TOP_NET_SOLAR_RAD','SOLAR_RAD'],windows,ONE_DAY,ONE_YEAR,ONE_YEAR,True)