In [76]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from sklearn.model_selection import train_test_split

In [119]:
#loading initial dataframe
df = pd.read_csv('../datasets/pipeline_df.csv', delimiter=',')
df['datetime_converted'] = pd.to_datetime(df['datetime_converted'], utc=True)
df = df.set_index(pd.DatetimeIndex(df['datetime_converted'],dayfirst=True))
df = df.drop(columns='datetime_converted.1')

In [121]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['Value (kWh)']), df['Value (kWh)'], test_size = 0.2, random_state=42)

In [128]:
X_train.columns

Index(['datetime_converted', 'Cloud amount (1/8)', 'Pressure (msl) (hPa)',
       'Relative humidity (%)', 'Precipitation intensity (mm/h)',
       'Snow depth (cm)', 'Air temperature (degC)',
       'Dew-point temperature (degC)', 'Horizontal visibility (m)',
       'Wind direction (deg)', 'Gust speed (m/s)', 'Wind speed (m/s)',
       'Global radiation (W/m2)'],
      dtype='object')

In [125]:
class WindDiscretizer(BaseEstimator, TransformerMixin):
    '''Discretization of Wind'''
    def __init__(self,  variables=None):
        if not isinstance(variables, list):
            self.variables = [variables]
        else:
            self.variables = variables

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X = X.copy()
        buckets = np.arange(11.25, 372, 22.5)
        labels = np.arange(17)
        for feature in self.variables:
            X[feature][(X[feature]>=0)&(X[feature]<11.25)] = X[feature].apply(lambda x:x+360)
            X[feature]=labels[np.digitize(X[feature], buckets)]
        return X

In [171]:
class DiscretizerNumericalIntoBinary(BaseEstimator, TransformerMixin):
    '''Discretization of cloud coverage and precipitation intensity into binary'''
    def __init__(self, boundaries, variables=None):
        if not isinstance(variables, list):
            self.variables = [variables]
        else:
            self.variables = variables
        self.boundaries=boundaries

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X = X.copy()
        for feature in self.variables:
            X[feature] = np.where(X[feature]>self.boundaries,1,0)
        return X

In [185]:
class DropUnnecessaryFeatures(BaseEstimator, TransformerMixin):
    def __init__(self, variables_to_drop=None):
        self.variables = variables_to_drop

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X = X.copy()
        X = X.drop(self.variables, axis=1)

        return X

In [None]:
# additional features

In [221]:
class TemporalFeatureHour(BaseEstimator, TransformerMixin):
        #remove for loop in the transform
        def __init__(self, variables=None):
            if not isinstance(variables, list):
                self.variables =  [variables]
            else:
                self.variables = variables

        def fit(self, X, y=None):
            return self
        
        def transform(self, X):
            X = X.copy()
            for feature in self.variables:
                X[feature] = X.index.hour
            
            return X

class TemporalFeatureDayofYear(BaseEstimator, TransformerMixin):
        #remove for loop in the transform
        def __init__(self, variables=None):
            if not isinstance(variables, list):
                self.variables =  [variables]
            else:
                self.variables = variables

        def fit(self, X, y=None):
            return self
        
        def transform(self, X):
            X = X.copy()
            for feature in self.variables:
                X[feature] = X.index.dayofyear
            
            return X     

In [240]:
from azimuth_one_script import *
longtitude = 24.9693
latitude = 60.1867
delta_GMT = 3

def calc_sun_azimuth_for_df(N, time, lon=longtitude, delta_GMT=3, phi=latitude):
    delta = calc_delta(N)
    omega = calc_omega(N, lon, delta_GMT, time)
    alpha_s = calc_alpha_s(phi, delta, omega)
    gamma_s = calc_gamma_s(alpha_s, phi, delta, omega)
    return gamma_s

def calc_alpha_s_for_df(N, time, lon=longtitude, delta_GMT=3, phi=latitude):
    delta = calc_delta(N)
    omega = calc_omega(N, lon, delta_GMT, time)
    alpha_s = calc_alpha_s(phi, delta, omega)
    return alpha_s

class SolarElevAngle(BaseEstimator, TransformerMixin):
    
        def __init__(self, var_name=None, day=None, hour=None): 
            self.var_name = var_name
            self.day = day
            self.hour = hour

        def fit(self, X, y=None):
            return self
        
        def transform(self, X):
            X = X.copy()
            X[self.var_name] = X.apply(lambda x: calc_alpha_s_for_df(N=x[self.day],time=x[self.day]),axis=1)
            
            return X 
        
class SunAzimuth(BaseEstimator, TransformerMixin):
    
        def __init__(self, var_name=None, day=None, hour=None): 
            self.var_name = var_name
            self.day = day
            self.hour = hour

        def fit(self, X, y=None):
            return self
        
        def transform(self, X):
            X = X.copy()
            X[self.var_name] = X.apply(lambda x: calc_sun_azimuth_for_df(N=x[self.day],time=x[self.day]),axis=1)
            
            return X 

In [224]:
a = SolarElevAngle(var_name=['Solar angle'], day='DayofYear', hour='hour')

In [225]:
a.fit_transform(X_train)

Unnamed: 0_level_0,datetime_converted,Cloud amount (1/8),Pressure (msl) (hPa),Relative humidity (%),Precipitation intensity (mm/h),Snow depth (cm),Air temperature (degC),Dew-point temperature (degC),Horizontal visibility (m),Wind direction (deg),Gust speed (m/s),Wind speed (m/s),Global radiation (W/m2),day_of_the_year
datetime_converted,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2015-10-17 19:00:00+00:00,2015-10-17 19:00:00+00:00,0.0,1020.3,94.0,0.0,-1.0,5.6,4.7,14520.0,251.0,5.4,4.2,-1.9,290
2016-06-05 15:00:00+00:00,2016-06-05 15:00:00+00:00,7.0,1017.6,33.0,0.0,-1.0,12.9,-2.9,40230.0,357.0,11.7,5.8,404.7,157
2016-06-15 07:00:00+00:00,2016-06-15 07:00:00+00:00,0.0,1011.2,41.0,0.0,-1.0,18.9,5.4,42910.0,89.0,5.3,3.4,653.8,167
2015-10-05 18:00:00+00:00,2015-10-05 18:00:00+00:00,0.0,1022.7,61.0,0.0,-1.0,4.6,-2.3,50000.0,336.0,5.2,3.5,-2.3,278
2015-08-12 03:00:00+00:00,2015-08-12 03:00:00+00:00,1.0,1019.4,94.0,0.0,-1.0,16.1,15.1,14820.0,287.0,2.9,2.5,19.2,224
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015-12-23 07:00:00+00:00,2015-12-23 07:00:00+00:00,5.0,993.3,85.0,0.0,0.0,7.0,4.7,50000.0,241.0,10.7,7.3,0.1,357
2016-07-21 22:00:00+00:00,2016-07-21 22:00:00+00:00,8.0,1015.9,79.0,0.0,-1.0,17.9,14.2,33560.0,354.0,5.9,3.5,-1.0,203
2019-12-04 18:00:00+00:00,2019-12-04 18:00:00+00:00,0.0,1005.2,92.0,0.0,0.0,2.9,1.7,50000.0,241.0,6.7,4.5,-1.6,338
2015-05-06 21:00:00+00:00,2015-05-06 21:00:00+00:00,7.0,1011.2,100.0,0.0,-1.0,7.8,7.8,2090.0,139.0,1.9,1.4,-0.8,126


In [241]:
pipe = Pipeline([
    ('wind_disc', WindDiscretizer(variables=['Wind direction (deg)'])),
    ('precipitation_binary', DiscretizerNumericalIntoBinary(boundaries=0, variables=['Precipitation intensity (mm/h)'])),
    ('cloud_binary', DiscretizerNumericalIntoBinary(boundaries=5, variables=['Cloud amount (1/8)'])),
    ('temporal_dayofyear', TemporalFeatureDayofYear(variables=['DayofYear'])),
    ('temporal_hour', TemporalFeatureHour(variables=['hour'])),
    ('solar_angle', SolarElevAngle(var_name='Solar angle', day='DayofYear', hour='hour')),
    ('sun_azimuth', SunAzimuth(var_name='Sun azimuth', day='DayofYear', hour='hour')),
    ('drop_features', DropUnnecessaryFeatures(variables_to_drop=['datetime_converted'])),
    ('rf_regressor', RandomForestRegressor(n_estimators=2, max_depth=2))
])

In [242]:
a = pipe.fit(X_train, y_train)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[feature][(X[feature]>=0)&(X[feature]<11.25)] = X[feature].apply(lambda x:x+360)


In [243]:
a.predict(X_train)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[feature][(X[feature]>=0)&(X[feature]<11.25)] = X[feature].apply(lambda x:x+360)


array([  2.82528612, 104.08590984, 187.40087556, ...,   2.82528612,
         2.82528612,   2.82528612])

In [244]:
a



Pipeline(memory=None,
         steps=[('wind_disc',
                 WindDiscretizer(variables=['Wind direction (deg)'])),
                ('precipitation_binary',
                 DiscretizerNumericalIntoBinary(boundaries=0,
                                                variables=['Precipitation '
                                                           'intensity '
                                                           '(mm/h)'])),
                ('cloud_binary',
                 DiscretizerNumericalIntoBinary(boundaries=5,
                                                variables=['Cloud amount '
                                                           '(1/8)'])),
                ('temporal_dayofyear',
                 TemporalFeatureDayofYear...
                 RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                       criterion='mse', max_depth=2,
                                       max_features='auto', max_leaf_nodes=None,
         