## Importamos las librerías

In [1]:
import pandas as pd
import numpy as np
from modelo_energetico.csv_gen import reduce_columns_sum, reduce_columns_avg, reduce_columns_period_avg, reduce_columns_period_sum, total_q_hour
from modelo_energetico.scaler import MultiScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import SGDRegressor, Lasso, Ridge
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.inspection import permutation_importance
from sklearn.preprocessing import OrdinalEncoder

## Cargamos los datasets

In [2]:
X = pd.read_csv('../raw_data/X_train.csv', index_col = 0)
y = pd.read_csv('../raw_data/y_train.csv', index_col = 0)

print(X.shape)
print(y.shape)

(7500, 12115)
(7500, 5376)


In [3]:
cols_t = y.iloc[: , -672:]
X = pd.concat([X, cols_t], axis = 1)
y = y.iloc[: , :-672]

In [None]:
'Q_AC_OFFICE', 'Q_HEAT_OFFICE', 'Q_PEOPLE', 'Q_EQP', 'Q_LIGHT', 'Q_AHU_C', 'Q_AHU_H'

In [21]:
y['Q_AHU_H_671'].head()

index
0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
Name: Q_AHU_H_671, dtype: float64

In [22]:
y = total_q_hour(y)
y.head()

  new_dataframe[f'Q_{i}'] = np.zeros(dataframe.shape[0])


Unnamed: 0,Q_0,Q_1,Q_2,Q_3,Q_4,Q_5,Q_6,Q_7,Q_8,Q_9,...,Q_662,Q_663,Q_664,Q_665,Q_666,Q_667,Q_668,Q_669,Q_670,Q_671
0,0.0,1.6818,1.6818,1.6818,1.6818,1.6818,1.6818,35.424,38.7781,38.7781,...,88.8217,95.9126,102.8931,102.8037,48.4459,12.9279,1.6818,5.1005,11.4541,8.684
1,0.0,2.9261,2.9261,2.9261,2.9261,2.9261,2.9261,44.874,44.874,44.874,...,23.4454,21.4724,22.6562,23.84,2.9261,2.9261,2.9261,2.9261,2.9261,2.9261
2,0.0,3.6095,3.6095,3.6095,3.6095,3.6095,3.6095,95.0924,92.1329,87.3977,...,78.1176,77.4426,79.144,82.4168,84.3485,13.3174,3.6095,3.6095,3.6095,3.6095
3,0.0,7.2196,7.2196,7.2196,7.2196,7.2196,7.2196,50.6268,49.739,48.2592,...,52.7971,52.4025,52.0079,52.4025,52.7971,7.2196,7.2196,7.2196,7.2196,7.2196
4,0.0,6.4811,6.4811,6.4811,6.4811,6.4811,6.4811,6.4811,6.4811,10.2298,...,70.2297,74.8921,78.2945,78.5638,6.4811,6.4811,13.5212,30.9471,25.7187,12.1803


### Reducimos los features

In [None]:
columns_s = [x[:-4] for x in X.columns if '671' in x]
columns_s

In [None]:
X_red_1 = reduce_columns_period_avg(X, columns_s, 28)
X_red_1.head()

In [None]:
X_red = reduce_columns_avg(X, columns_s, 3)
X_red.head()

In [None]:
columns_c = X.columns[0:19]
columns_c

In [None]:
columns_c = X.columns[0:16]

In [None]:
unique_values = X[columns_c].nunique(dropna=False, )
unique_values.sort_values(ascending=True)

In [None]:
columns_c = unique_values.sort_values(ascending=True).keys()[1:]

In [None]:
ordinal_encoder = OrdinalEncoder()
ordinal_encoder.fit(X[columns_c])

In [None]:
X = pd.concat([X[columns_c], X_red], axis = 1)

In [None]:
X.head()

### Reducimos los targets

In [None]:
columns_s = [x[:-4] for x in y.columns if '671' in x]
columns_s

In [None]:
y_red = reduce_columns_sum(y, ['Q_AC_OFFICE', 'Q_HEAT_OFFICE', 'Q_PEOPLE', 'Q_EQP', 'Q_LIGHT', 'Q_AHU_C', 'Q_AHU_H'], 1)
y_red['Q_TOTAL'] = y_red.sum(axis = 1)
y_red.head()

In [None]:
y_red_t = reduce_columns_avg(y, ['T_INT_OFFICE'], 1)
y_red_t.head()

In [None]:
y = pd.concat([y_red['Q_TOTAL'], y_red_t], axis = 1)
y.head()

### Corroboramos los nuevos shapes

In [None]:
print(X.shape)
print(y.shape)

## Spliteamos los datasets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

## Buscamos modelo óptimo para predecir temperatura y consumo

### Modelo para temperatura

In [None]:
pipe = Pipeline([
          ("scaling" , MultiScaler(scaler = "RobustScaler")),
          ("model", SGDRegressor(max_iter=50000, early_stopping=True, n_iter_no_change=3, tol=1e-3))
])

In [None]:
for i in pipe.get_params().keys():
    print(i)

In [None]:
X_train_t = X_train
y_train_t = y_train

params = {
    'scaling__scaler' : [StandardScaler(), RobustScaler(), MinMaxScaler() ],     
    'model__loss': ['huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'],
    'model__alpha': [1, 0.1, 0.01, 0.001, 0.0001],
    'model__l1_ratio': [1, 0.1, 0.01, 0.001, 0.0001],
}

grid = GridSearchCV(pipe, 
                    param_grid= params, 
                    cv=10,
                    n_jobs=-1,
                    verbose=1,
                    scoring = 'r2'
                   )

grid.fit(X_train_t,y_train_t['T_INT_OFFICE_0'])

In [None]:
best_model = grid.best_estimator_
best_model

### Buscamos los parámetros negativos para el score de temperatura

In [None]:
best_model.fit(X_train_t,y_train_t['T_INT_OFFICE_0']) 

permutation_score = permutation_importance(best_model, X_train_t, y_train_t['T_INT_OFFICE_0'], n_repeats=10) 

importance_df = pd.DataFrame(np.vstack((X.columns, permutation_score.importances_mean)).T ) 
importance_df.columns=['feature','score decrease']

In [None]:
importance_df = importance_df[importance_df['score decrease'] < 0]
importance_df.sort_values(by="score decrease", ascending = True)

In [None]:
neg_score_t = importance_df['feature']
neg_score_t

### Modelo para consumo

In [None]:
X_train_q = X_train
y_train_q = y_train

params = {
    'scaling__scaler' : [StandardScaler(), RobustScaler(), MinMaxScaler() ],     
    'model__loss': ['huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'],
    'model__alpha': [1, 0.1, 0.01, 0.001, 0.0001],
    'model__l1_ratio': [1, 0.1, 0.01, 0.001, 0.0001],
}

grid = GridSearchCV(pipe, 
                    param_grid= params, 
                    cv=10,
                    n_jobs=-1,
                    verbose=1,
                    scoring = 'r2'
                   )

grid.fit(X_train_q,y_train_q['Q_TOTAL'])

In [None]:
best_model = grid.best_estimator_
best_model

### Buscamos los parámetros negativos para el score de consumo

In [None]:
best_model.fit(X_train_q,y_train_q['Q_TOTAL']) 

permutation_score = permutation_importance(best_model, X_train_q, y_train_q['Q_TOTAL'], n_repeats=10) 

importance_df = pd.DataFrame(np.vstack((X.columns, permutation_score.importances_mean)).T ) 
importance_df.columns=['feature','score decrease']

In [None]:
importance_df = importance_df[importance_df['score decrease'] < 0]
importance_df.sort_values(by="score decrease", ascending = True)

In [None]:
neg_score_t = importance_df['feature']
neg_score_t