# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_curve
from sklearn.metrics import f1_score
from sklearn.metrics import fbeta_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Dropout
from tensorflow.python.keras.losses import SparseCategoricalCrossentropy
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split

%matplotlib inline

#print(tf.__version__)

# Carregamento de dados

In [2]:
%run "datasets/parse.ipynb"
train_df = parse_train("")
test_df = parse_test("")

# SEED utilizada

In [3]:
SEED = 2022

# Preparação dos dados

In [4]:
categorical_to_numerical = {
    'Injecao na rede (kWh)': {
        'None': 0,
        'Low': 1,
        'Medium': 2,
        'High':3,
        'Very High':4
    },
    'weather_description': {
        'overcast clouds': 0,
        'scattered clouds': 1,
        'few clouds': 2,
        'sky is clear': 3,
        'broken clouds': 4,
        'light rain': 5,
        'moderate rain': 6,
        'heavy intensity rain':7
    }
}

numeric_columns = ['Hora', 'Normal (kWh)', 'Horario Economico (kWh)', 'Autoconsumo (kWh)', 'temp','feels_like','temp_min','temp_max','pressure','humidity', 'wind_speed','clouds_all','rain_1h']


In [5]:
def neural_network_data_preparation(df: pd.DataFrame,test_or_train) -> pd.DataFrame:
    prep_df= df
    prep_df['rain_1h'] = prep_df['rain_1h'].replace('', '0')
    prep_df['rain_1h'] = pd.to_numeric(prep_df['rain_1h'])
            
    ### Converter as features categoricas em numericas
    prep_df.replace(categorical_to_numerical, inplace=True)

    ### Extrair a hora e dia da semana da feature 'record_date'
    record_date = pd.DatetimeIndex(prep_df['Data'])

    prep_df['Month'] = record_date.month
    prep_df['Day'] = record_date.day
    prep_df['Year'] = record_date.year

    dropped_columns = ['grnd_level','sea_level','dt','Data','city_name']
    prep_df = prep_df.drop(dropped_columns, axis=1)

    prep_df.loc[(prep_df['Hora'] < 6) & (prep_df['Hora'] > 21), 'Injecao na rede (kWh)'] = 0

    

   
    ### Limites superior e inferior (sem outliers) dos diagramas de caixa
    whiskers = []
    for num in numeric_columns:
        Q1 = prep_df[num].quantile(0.25)
        Q3 = prep_df[num].quantile(0.75)
        IQR = Q3 - Q1

        lower = Q1 - 1.5 * IQR
        upper = Q3 + 1.5 * IQR
        whiskers.append((num,lower,upper))

    ### Tratar os outliers das features numéricas:
    ### Substituir os valores dos outliers pela mediana
    if(train_test_split == "train"):
        for num, lower, upper in whiskers:
            prep_df = prep_df[(prep_df[num] >= lower) & (prep_df[num] <= upper)]

    return prep_df

In [6]:
X = neural_network_data_preparation(train_df,"train")

In [7]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2256 entries, 0 to 2255
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Hora                     2256 non-null   int64  
 1   Normal (kWh)             2256 non-null   float64
 2   Horario Economico (kWh)  2256 non-null   float64
 3   Autoconsumo (kWh)        2256 non-null   float64
 4   Injecao na rede (kWh)    2256 non-null   int64  
 5   temp                     2256 non-null   float64
 6   feels_like               2256 non-null   float64
 7   temp_min                 2256 non-null   float64
 8   temp_max                 2256 non-null   float64
 9   pressure                 2256 non-null   int64  
 10  humidity                 2256 non-null   int64  
 11  wind_speed               2256 non-null   float64
 12  rain_1h                  2256 non-null   float64
 13  clouds_all               2256 non-null   int64  
 14  weather_description     

In [8]:
y = X['Injecao na rede (kWh)']

X.drop(columns=['Injecao na rede (kWh)'], inplace=True)

In [9]:
scaler_X = MinMaxScaler(feature_range=(0, 1)).fit(X)
X_scaled = pd.DataFrame(scaler_X.transform(X[X.columns]), columns=X.columns)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=SEED)

# Construção da estrutura da rede neuronal

In [11]:
from keras import regularizers
from tensorflow import keras

optimizer = keras.optimizers.Adam(learning_rate=0.001)


def build_model():
    model = keras.Sequential()

    model.add(Dense(16, input_dim=12, activation='relu',
                    kernel_regularizer=regularizers.l2(0.001),
                    ))  # Input Layer

    model.add(Dense(8, activation="relu",
                    kernel_regularizer=regularizers.l2(0.001),
                    ))

    model.add(Dense(5, activation='softmax',
                    kernel_regularizer=regularizers.l2(0.001),
                    ))  # Output Layer

    # Model compilation
    model.compile(loss=SparseCategoricalCrossentropy(), optimizer=optimizer,
                  #keras.optimizers.Adam(learning_rate=0.001),
                  metrics=['accuracy'])

    return model


In [12]:
model = build_model()

In [13]:
EPOCHS = 500
BATCH_SIZE = 100

In [14]:
history = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

-------
-------
-------

In [15]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)



In [16]:
predicts = model.predict(X_test)

categories_predicted = [np.argmax(pred) for pred in predicts]



In [17]:
predicts[:5]

array([[5.90461865e-02, 1.14894345e-01, 7.22196817e-01, 1.02987461e-01,
        8.75178375e-04],
       [9.99823332e-01, 9.97160751e-05, 7.70194965e-05, 3.27787504e-08,
        1.73860611e-12],
       [5.83537680e-04, 6.13085646e-03, 8.73678327e-02, 2.98190773e-01,
        6.07726932e-01],
       [9.99954522e-01, 3.41030136e-05, 1.13257775e-05, 1.60547042e-09,
        1.28701041e-14],
       [9.99999821e-01, 7.47716555e-08, 2.72261840e-08, 2.53925548e-12,
        8.90032469e-16]], dtype=float32)

In [18]:
categories_predicted

[2,
 0,
 4,
 0,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 2,
 0,
 2,
 2,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 0,
 2,
 2,
 0,
 2,
 0,
 0,
 0,
 2,
 4,
 0,
 2,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 3,
 0,
 0,
 0,
 0,
 2,
 4,
 2,
 0,
 2,
 2,
 2,
 2,
 0,
 0,
 0,
 0,
 0,
 4,
 0,
 3,
 2,
 0,
 0,
 1,
 0,
 0,
 2,
 0,
 0,
 0,
 3,
 0,
 2,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 0,
 2,
 0,
 2,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 3,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 3,
 0,
 3,
 2,
 2,
 0,
 0,
 1,
 0,
 3,
 0,
 0,
 4,
 0,
 2,
 0,
 3,
 0,
 0,
 3,
 1,
 2,
 0,
 4,
 0,
 2,
 2,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 3,
 3,
 3,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 2,
 2,
 2,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 3,
 2,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 0,


In [19]:
y_test

1089    2
49      0
2174    4
453     0
887     0
       ..
637     3
1029    0
256     0
987     0
2090    0
Name: Injecao na rede (kWh), Length: 452, dtype: int64

# Obter as previsões no dataset de submissão

In [20]:
#test_df.info()
X = neural_network_data_preparation(test_df,"test")
# X.drop(columns=['Injecao na rede (kWh)'], inplace=True)

scaler_X = MinMaxScaler(feature_range=(0, 1)).fit(X)
X_scaled = pd.DataFrame(scaler_X.transform(X[X.columns]), columns=X.columns)

X_scaled

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Unnamed: 0,Hora,Normal (kWh),Horario Economico (kWh),Autoconsumo (kWh),temp,feels_like,temp_min,temp_max,pressure,humidity,wind_speed,rain_1h,clouds_all,weather_description,Month,Day,Year,Injecao na rede (kWh)
0,0.000000,0.000000,0.168531,0.000000,0.040369,0.048576,0.045798,0.034201,0.433333,0.9625,0.186275,0.583072,1.0,0.857143,0.0,0.0,0.0,
1,0.043478,0.000000,0.208228,0.000000,0.042253,0.050646,0.048194,0.035794,0.400000,0.9625,0.201961,0.510972,1.0,0.857143,0.0,0.0,0.0,
2,0.086957,0.000000,0.124865,0.000000,0.042454,0.050846,0.048363,0.038913,0.366667,0.9625,0.270588,0.382445,1.0,0.857143,0.0,0.0,0.0,
3,0.130435,0.000000,0.097438,0.000000,0.042656,0.051080,0.043402,0.038913,0.333333,0.9625,0.344118,0.272727,1.0,0.857143,0.0,0.0,0.0,
4,0.173913,0.000000,0.090942,0.000000,0.042589,0.051013,0.043402,0.038913,0.300000,0.9625,0.437255,0.177116,1.0,0.857143,0.0,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2251,0.826087,0.281869,0.000000,0.010336,0.983819,0.982773,0.979514,0.990374,0.300000,0.3125,0.187255,0.000000,0.0,0.428571,1.0,0.1,0.0,
2252,0.869565,0.270630,0.000000,0.000000,0.976250,0.975295,0.972764,0.982408,0.333333,0.4250,0.189216,0.000000,0.0,0.428571,1.0,0.1,0.0,
2253,0.913043,0.141674,0.000000,0.000000,0.962760,0.961373,0.964428,0.973493,0.366667,0.5375,0.203922,0.000000,0.0,0.428571,1.0,0.1,0.0,
2254,0.956522,0.000000,0.179358,0.000000,0.953711,0.952092,0.958657,0.963019,0.366667,0.6250,0.241176,0.000000,0.0,0.428571,1.0,0.1,0.0,


In [21]:
categories_prob_predictions = model.predict(X_scaled)

categories_prob_predictions

InvalidArgumentError: Graph execution error:

Detected at node 'sequential/module_wrapper/dense/Relu' defined at (most recent call last):
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
      app.start()
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\ipykernel\kernelapp.py", line 736, in start
      self.io_loop.start()
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\tornado\platform\asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\asyncio\base_events.py", line 595, in run_forever
      self._run_once()
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\asyncio\base_events.py", line 1881, in _run_once
      handle._run()
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\ipykernel\kernelbase.py", line 516, in dispatch_queue
      await self.process_one()
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\ipykernel\kernelbase.py", line 505, in process_one
      await dispatch(*args)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\ipykernel\kernelbase.py", line 412, in dispatch_shell
      await result
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\ipykernel\kernelbase.py", line 740, in execute_request
      reply_content = await reply_content
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\ipykernel\ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\ipykernel\zmqshell.py", line 546, in run_cell
      return super().run_cell(*args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\IPython\core\interactiveshell.py", line 3024, in run_cell
      result = self._run_cell(
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\IPython\core\interactiveshell.py", line 3079, in _run_cell
      result = runner(coro)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\IPython\core\interactiveshell.py", line 3284, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\IPython\core\interactiveshell.py", line 3466, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\IPython\core\interactiveshell.py", line 3526, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Henrique Fernandes\AppData\Local\Temp\ipykernel_6696\431979401.py", line 1, in <module>
      categories_prob_predictions = model.predict(X_scaled)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\engine\training.py", line 2253, in predict
      tmp_batch_outputs = self.predict_function(iterator)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\engine\training.py", line 2041, in predict_function
      return step_function(self, iterator)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\engine\training.py", line 2027, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\engine\training.py", line 2015, in run_step
      outputs = model.predict_step(data)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\engine\training.py", line 1983, in predict_step
      return self(x, training=False)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\engine\sequential.py", line 410, in call
      return super().call(inputs, training=training, mask=mask)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\keras\engine\functional.py", line 1665, in call
      return getattr(self._module, self._method_name)(*args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1044, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\tensorflow\python\keras\layers\core.py", line 1253, in call
      outputs = self.activation(outputs)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\tensorflow\python\keras\activations.py", line 312, in relu
      return backend.relu(x, alpha=alpha, max_value=max_value, threshold=threshold)
    File "c:\Users\Henrique Fernandes\.conda\envs\envNAME\lib\site-packages\tensorflow\python\keras\backend.py", line 4747, in relu
      x = nn.relu(x)
Node: 'sequential/module_wrapper/dense/Relu'
Matrix size-incompatible: In[0]: [32,18], In[1]: [17,16]
	 [[{{node sequential/module_wrapper/dense/Relu}}]] [Op:__inference_predict_function_41322]

In [None]:
numerical_predictions = [np.argmax(pred) for pred in categories_prob_predictions]

numerical_predictions[:10]

[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]

In [None]:
numerical_predictions_df = pd.DataFrame(numerical_predictions)

numerical_to_categorical = {
    0: {
        0 : 'None',
        1 : 'Low',
        2 : 'Medium',
        3 : 'High',
        4 : 'Very High'
    }
}

predictions_df.replace(numerical_to_categorical, inplace=True)

predictions_df = numerical_predictions_df.replace(incidents_categories)

In [None]:
predictions_df.index += 1

predictions_df.to_csv("submission.csv", header=['Result'], index_label='RowId')