<a href="https://colab.research.google.com/github/sergioopereira/AI/blob/main/mod4_redes_neurais/Code/NovoDataPrep/.ipynb_checkpoints/01_ConciliarDados-checkpoint.v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [40]:
import pandas as pd
import numpy as np

### **A) Importar Dados de Chuvas, Pressao, Precipitação e Temperatura**

In [41]:
# configura a localização dos arquivos "processed/<nome do arquivo>.csv
output_direcao = 'processed/teste_03_patio_direcao.csv'
output_pressao = 'processed/teste_03_patio_pressao.csv'
output_temp = 'processed/teste_03_patio_temperatura.csv'
output_vento = 'processed/teste_03_patio_ventoverif.csv'

In [42]:
# carrega os arquivos em data frames
df_direcao = pd.read_csv(output_direcao, delimiter = ";", index_col=0)
df_pressao = pd.read_csv(output_pressao, delimiter = ";", index_col=0)
df_temp = pd.read_csv(output_temp, delimiter = ";", index_col=0)
df_vento = pd.read_csv(output_vento, delimiter = ";", index_col=0)

# OBS: Senão utilizar o parametro index_col=0, o dataframe cria uma coluna "Unnamed" para representar o indice
# e a remoção desta coluna pode ser pelo comando: df.drop(df.columns[0], axis=1, inplace=True)

### **B) Sobre os 4 conjuntos de dados**

#### <span style="color:#DC143C">**B1. Formato dos Dados**

In [43]:
# Exibe a dimensão de cada data frame
print("Direcao:" + str(df_direcao.shape))
print("Pressao:" + str(df_pressao.shape))
print("Temperatura:" + str(df_temp.shape))
print("Vento:" + str(df_vento.shape))

Direcao:(8758, 4)
Pressao:(8451, 4)
Temperatura:(8454, 4)
Vento:(8760, 4)


#### <span style="color:#DC143C">**B2. Visão dos Dados**

In [44]:
# Exibe as primeiras linhas do data frame direção do vento
df_direcao.head(5)

Unnamed: 0,Longitude,Latitude,Data,Direcao_Verif
1,-42.75,-14.25,2017-01-01 01:00:00,119.2
2,-42.75,-14.25,2017-01-01 02:00:00,120.98
3,-42.75,-14.25,2017-01-01 03:00:00,119.72
4,-42.75,-14.25,2017-01-01 04:00:00,116.72
5,-42.75,-14.25,2017-01-01 05:00:00,117.15


In [45]:
# Exibe as primeiras linhas do data frame temperatura
df_temp.head(5)

Unnamed: 0,Longitude,Latitude,Data,Temperatura_Verif
1,-42.75,-14.25,2017-01-01 01:00:00,18.52
2,-42.75,-14.25,2017-01-01 02:00:00,17.63
3,-42.75,-14.25,2017-01-01 03:00:00,16.92
4,-42.75,-14.25,2017-01-01 04:00:00,16.48
5,-42.75,-14.25,2017-01-01 05:00:00,15.98


In [46]:
# Exibe as primeiras linhas do data frame magnitude do vento
df_vento.head(5)

Unnamed: 0,Longitude,Latitude,Data,Magnitude_Verif
0,-42.75,-14.25,2017-01-01 00:00:00,9.82
1,-42.75,-14.25,2017-01-01 01:00:00,8.74
2,-42.75,-14.25,2017-01-01 02:00:00,8.45
3,-42.75,-14.25,2017-01-01 03:00:00,9.18
4,-42.75,-14.25,2017-01-01 04:00:00,9.19


In [47]:
# Exibe as primeiras linhas do data frame pressão atmosférica
df_pressao.head(5)

Unnamed: 0,Longitude,Latitude,Data,pressao_Verif
1,-42.75,-14.25,2017-01-01 01:00:00,882.33
2,-42.75,-14.25,2017-01-01 02:00:00,882.0
3,-42.75,-14.25,2017-01-01 03:00:00,882.0
4,-42.75,-14.25,2017-01-01 04:00:00,882.67
5,-42.75,-14.25,2017-01-01 05:00:00,883.0


### **C) Merge dos dados**

In [48]:
# Junta os dados de todos os data frames em um novo data frame
df_energia = pd.merge(df_vento, df_pressao, how = 'outer', on = ['Longitude','Latitude','Data'])
df_energia = pd.merge(df_energia, df_temp,  how = 'outer', on = ['Longitude','Latitude','Data'])
df_energia = pd.merge(df_energia, df_direcao, how = 'outer', on = ['Longitude','Latitude','Data'])


In [49]:
# Exibe o conteúdo do data frame resultante da junção
df_energia

Unnamed: 0,Longitude,Latitude,Data,Magnitude_Verif,pressao_Verif,Temperatura_Verif,Direcao_Verif
0,-42.75,-14.25,2017-01-01 00:00:00,9.82,,,
1,-42.75,-14.25,2017-01-01 01:00:00,8.74,882.33,18.52,119.20
2,-42.75,-14.25,2017-01-01 02:00:00,8.45,882.00,17.63,120.98
3,-42.75,-14.25,2017-01-01 03:00:00,9.18,882.00,16.92,119.72
4,-42.75,-14.25,2017-01-01 04:00:00,9.19,882.67,16.48,116.72
...,...,...,...,...,...,...,...
8755,-42.75,-14.25,2017-12-31 19:00:00,7.22,882.50,24.88,87.67
8756,-42.75,-14.25,2017-12-31 20:00:00,6.14,883.33,24.88,79.83
8757,-42.75,-14.25,2017-12-31 21:00:00,6.04,884.00,24.52,82.50
8758,-42.75,-14.25,2017-12-31 22:00:00,6.72,884.00,24.33,85.17


#### **D.1) Colunas NULAS ?**

In [52]:
# Identifica as colunas que possuem dados faltantes
df_energia.isnull().sum()

Longitude              0
Latitude               0
Data                   0
Magnitude_Verif        0
pressao_Verif        309
Temperatura_Verif    306
Direcao_Verif          2
dtype: int64

In [54]:
# Remove as linhas que possuem pelo menos um dado faltante
df_energia.dropna(axis=0, how='any', thresh=None, subset=None, inplace=True)

In [55]:
# Identifica as colunas que possuem dados faltantes
df_energia.isnull().sum()

Longitude            0
Latitude             0
Data                 0
Magnitude_Verif      0
pressao_Verif        0
Temperatura_Verif    0
Direcao_Verif        0
dtype: int64

### **D) Exporta Dados**

In [56]:
output_file = 'processed/teste_03_concilia_patio1.csv'
df_energia.to_csv(output_file, sep=';', encoding='utf-8')

In [57]:
df_energia


Unnamed: 0,Longitude,Latitude,Data,Magnitude_Verif,pressao_Verif,Temperatura_Verif,Direcao_Verif
1,-42.75,-14.25,2017-01-01 01:00:00,8.74,882.33,18.52,119.20
2,-42.75,-14.25,2017-01-01 02:00:00,8.45,882.00,17.63,120.98
3,-42.75,-14.25,2017-01-01 03:00:00,9.18,882.00,16.92,119.72
4,-42.75,-14.25,2017-01-01 04:00:00,9.19,882.67,16.48,116.72
5,-42.75,-14.25,2017-01-01 05:00:00,9.36,883.00,15.98,117.15
...,...,...,...,...,...,...,...
8754,-42.75,-14.25,2017-12-31 18:00:00,6.84,882.00,25.28,80.00
8755,-42.75,-14.25,2017-12-31 19:00:00,7.22,882.50,24.88,87.67
8756,-42.75,-14.25,2017-12-31 20:00:00,6.14,883.33,24.88,79.83
8757,-42.75,-14.25,2017-12-31 21:00:00,6.04,884.00,24.52,82.50
