In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
data_inmet = pd.read_csv('../../model/additional_data/datasets/inmet_2019_2024.csv')
data_modis = pd.read_csv('../../model/data/modisTodos.csv')

In [16]:
# Verificar os tipos de dados das colunas de latitude e longitude
print("Tipos de dados em data_inmet:")
print(data_inmet.dtypes)
print("\nTipos de dados em data_modis:")
print(data_modis.dtypes)

Tipos de dados em data_inmet:
data                                                      object
latitude                                                 float64
longitude                                                float64
PRECIPITACAO TOTAL, HORARIO (mm)                         float64
PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)    float64
RADIACAO GLOBAL (KJ/m2)                                  float64
TEMPERATURA DO AR - BULBO SECO, HORARIA (0C)             float64
UMIDADE RELATIVA DO AR, HORARIA (%)                      float64
VENTO, RAJADA MAXIMA (m/s)                               float64
dtype: object

Tipos de dados em data_modis:
latitude      float64
longitude     float64
confidence    float64
dtype: object


In [17]:
# Verificar se há valores nulos
print("\nValores nulos em data_inmet:")
print(data_inmet[['latitude', 'longitude']].isnull().sum())
print("\nValores nulos em data_modis:")
print(data_modis[['latitude', 'longitude']].isnull().sum())


Valores nulos em data_inmet:
latitude     0
longitude    0
dtype: int64

Valores nulos em data_modis:
latitude     0
longitude    0
dtype: int64


In [18]:
# Converter as colunas para string, se necessário
data_inmet['latitude'] = data_inmet['latitude'].astype(str).str.replace(',', '.')
data_inmet['longitude'] = data_inmet['longitude'].astype(str).str.replace(',', '.')

data_modis['latitude'] = data_modis['latitude'].astype(str).str.replace(',', '.')
data_modis['longitude'] = data_modis['longitude'].astype(str).str.replace(',', '.')

In [19]:
# Ajustar os tipos de dados para latitude e longitude
data_inmet['latitude'] = data_inmet['latitude'].astype(float)
data_inmet['longitude'] = data_inmet['longitude'].astype(float)
data_modis['latitude'] = data_modis['latitude'].astype(float)
data_modis['longitude'] = data_modis['longitude'].astype(float)

In [20]:
# Arredondar coordenadas para combinar dados com tolerância
data_inmet['latitude'] = data_inmet['latitude'].round(4)
data_inmet['longitude'] = data_inmet['longitude'].round(4)
data_modis['latitude'] = data_modis['latitude'].round(4)
data_modis['longitude'] = data_modis['longitude'].round(4)

In [21]:
# Preparar os dados para Nearest Neighbors
inmet_coords = data_inmet[['latitude', 'longitude']].values
modis_coords = data_modis[['latitude', 'longitude']].values
nbrs = NearestNeighbors(n_neighbors=1, radius=0.01).fit(modis_coords)
distances, indices = nbrs.kneighbors(inmet_coords)

In [22]:
# Adicionar índice de correspondência e distância ao DataFrame do INMET
data_inmet['modis_index'] = indices.flatten()
data_inmet['distance'] = distances.flatten()

In [23]:
# Filtrar correspondências dentro da tolerância
tolerance = 0.1
filtered_inmet = data_inmet[data_inmet['distance'] < tolerance]

In [24]:
# Fazer o merge
combined_data_tolerance = pd.merge(filtered_inmet, data_modis, left_on='modis_index', right_index=True, how='inner')

# Verifique o DataFrame combinado com tolerância
print("DataFrame Combinado com Tolerância:")
print(combined_data_tolerance.head())

DataFrame Combinado com Tolerância:
         data  latitude_x  longitude_x  PRECIPITACAO TOTAL, HORARIO (mm)  \
0  2019-01-01    -15.7893     -47.9258                               1.4   
1  2019-01-02    -15.7893     -47.9258                               0.0   
2  2019-01-03    -15.7893     -47.9258                               0.0   
3  2019-01-04    -15.7893     -47.9258                               0.0   
4  2019-01-05    -15.7893     -47.9258                               1.0   

   PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)  \
0                                             888.02       
1                                             888.80       
2                                             887.62       
3                                             887.54       
4                                             887.96       

   RADIACAO GLOBAL (KJ/m2)  TEMPERATURA DO AR - BULBO SECO, HORARIA (0C)  \
0                  1192.79                                         20.

In [30]:
# Divida os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [31]:
# Criar o modelo Feedforward
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [32]:
# Compilar o modelo
model.compile(loss='mean_squared_error', optimizer='adam')

In [33]:
# Treinar o modelo
model.fit(X_train, y_train, epochs=100, batch_size=10, verbose=1)

Epoch 1/100
[1m3739/3739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 315us/step - loss: 1718.4664
Epoch 2/100
[1m3739/3739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 304us/step - loss: 868.7367
Epoch 3/100
[1m3739/3739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 323us/step - loss: 834.3995
Epoch 4/100
[1m3739/3739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 300us/step - loss: 819.9442
Epoch 5/100
[1m3739/3739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 300us/step - loss: 811.8189
Epoch 6/100
[1m3739/3739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 298us/step - loss: 814.2697
Epoch 7/100
[1m3739/3739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 299us/step - loss: 808.2418
Epoch 8/100
[1m3739/3739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 301us/step - loss: 798.2357
Epoch 9/100
[1m3739/3739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 300us/step - loss: 809.6699
Epoch 10/100
[1m3739/3739

<keras.src.callbacks.history.History at 0x106c1de80>

In [34]:
# Fazer previsões
predictions = model.predict(X_test)

[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 520us/step


In [45]:
# Adicionar as previsões ao DataFrame
X_test = X_test.copy()
X_test['predicted_confidence'] = predictions

In [49]:
print(combined_data_tolerance)

             data  latitude_x  longitude_x  PRECIPITACAO TOTAL, HORARIO (mm)  \
0      2019-01-01    -15.7893     -47.9258                               1.4   
1      2019-01-02    -15.7893     -47.9258                               0.0   
2      2019-01-03    -15.7893     -47.9258                               0.0   
3      2019-01-04    -15.7893     -47.9258                               0.0   
4      2019-01-05    -15.7893     -47.9258                               1.0   
...           ...         ...          ...                               ...   
48803  2024-08-27    -27.6025     -48.6200                               0.0   
48804  2024-08-28    -27.6025     -48.6200                               0.0   
48805  2024-08-29    -27.6025     -48.6200                               0.0   
48806  2024-08-30    -27.6025     -48.6200                               0.0   
48807  2024-08-31    -27.6025     -48.6200                               0.0   

       PRESSAO ATMOSFERICA AO NIVEL DA 

In [50]:
# Verifique se o DataFrame combinado não está vazio
if not combined_data_tolerance.empty:
    # Obter as previsões (supondo que você já tenha feito a previsão e adicionado a coluna 'predicted_confidence')
    final_df = combined_data_tolerance[['latitude_x', 'longitude_x']].copy()  # Copiar as latitudes e longitudes

    # Adicionar a coluna de previsões de confiança
    final_df['predicted_confidence'] = X_test['predicted_confidence'].reset_index(drop=True)

    # Resetar o índice do DataFrame final
    final_df.reset_index(drop=True, inplace=True)

    # Exibir o DataFrame final
    print("DataFrame final com latitude, longitude e predicted_confidence:")
    print(final_df.head())
else:
    print("O DataFrame combinado está vazio. Não há dados para criar o DataFrame final.")

DataFrame final com latitude, longitude e predicted_confidence:
   latitude_x  longitude_x  predicted_confidence
0    -15.7893     -47.9258             29.565857
1    -15.7893     -47.9258             45.698273
2    -15.7893     -47.9258             34.051170
3    -15.7893     -47.9258             27.683376
4    -15.7893     -47.9258             38.915058


In [None]:
# Salvar o DataFrame em um arquivo CSV
final_df.to_csv('previsao_confidence_feedforward.csv', index=False)