In [9]:
import numpy as np
import pandas as pd
from datetime import datetime,date,time
from dateutil.relativedelta import relativedelta

import warnings
warnings.filterwarnings("ignore")

### **A) Coleta dos Dados de Direção do Vento**

In [10]:
source_file = '../../../../PrevisaoVento/Data/raw/Variaveis_EPE/BAUET2_CAETITÉ_2_TEM_Verif_EPE_edit.txt'

In [11]:
dfColeta = pd.read_csv(source_file, delimiter = ';' , header=None )

### **B) Sobre os Dados no Patio 1**

#### <span style="color:#DC143C">**B1.Formato dos Dados**

In [12]:
dfColeta.shape # formato da matriz

(1622, 49)

#### <span style="color:#DC143C">**B2.Visao do Dataframe**

In [13]:
dfColeta.head(2)  # visão de 5 linhas

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,39,40,41,42,43,44,45,46,47,48
0,20170101,22.267,22.1165,18.733,18.3,17.867,17.4,17.0,16.833,16.633,...,25.367,25.467,25.733,25.7,25.767,25.667,25.5,23.8,22.1,21.833
1,20170102,21.3,20.667,20.267,19.833,19.0,18.6,18.333,17.9,17.567,...,27.267,27.267,27.3,27.267,27.233,26.533,26.1,25.0,22.933,22.9


### **C) Extração os Dados de 2017**

#### <span style="color:#DC143C"> **C1. Converter a primeira coluna em Data**

In [14]:
dfColeta[0]= pd.to_datetime(dfColeta[0], format='%Y%m%d')

#### <span style="color:#DC143C"> **C2.Remove registros fora do Intervalo**

In [15]:
dfColeta = dfColeta.drop(dfColeta[dfColeta[0] > '2018-01-02'].index)
dfColeta = dfColeta.drop(dfColeta[dfColeta[0] < '2017-01-01'].index)

In [16]:
# Inclui colunas de Latitude e Longitude
dfColeta['longitude'] = -42.75
dfColeta['latitude']  = -14.25 

In [17]:
# Transfere as 2 ultimas colunas para a primeira posição da lista
cols = list(dfColeta)
cols = [cols[-1]] + cols[:-1]
dfColeta = dfColeta[cols]

cols = list(dfColeta)
cols = [cols[-1]] + cols[:-1]
dfColeta = dfColeta[cols]

In [18]:
# Reseta Indice
dfColeta = dfColeta.reset_index()

### **D) Formatação dos Dados**

#### <span style="color:#DC143C"> **D1. Migrar os dados para o Formato**

In [19]:
frente = ['Longitude', 'Latitude', 'Data', 'Valor30_1', 'Valor30_2']  # Cabeçalho
dfTemperatura = pd.DataFrame(columns=frente)  # Criação de um dataframe vazio
    
Linha1    =  dfColeta.shape[0] 
Colunas1  =  dfColeta.shape[1] 

for lin2 in range(Linha1):  
    refhora = 0
    
    for col in range (Colunas1):        
        if col > 0:            
        
            if col % 2 != 0: # Se o número (col) for impar                   
               
                if col < 49:
                    
                    Data = dfColeta.loc[lin2,0]
                                  
                    hora = time(hour=refhora, minute=0, second=0)            
                    dataCompleta = datetime.combine(Data, hora)
                   
                    lin_alvo = [dfColeta.loc[lin2,'longitude'], dfColeta.loc[lin2,'latitude'], dataCompleta , dfColeta.loc[lin2,col], dfColeta.loc[lin2,col+1]] 
                  
                    dfTemperatura.loc[len(dfTemperatura)] = lin_alvo  # adding a row
                         
                refhora = refhora + 1          
                


#### <span style="color:#DC143C"> **D2. Informações sobre os Tipos de Dados**

In [20]:
dfTemperatura.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8808 entries, 0 to 8807
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Longitude  8808 non-null   float64       
 1   Latitude   8808 non-null   float64       
 2   Data       8808 non-null   datetime64[ns]
 3   Valor30_1  8808 non-null   float64       
 4   Valor30_2  8808 non-null   float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 412.9 KB


#### <span style="color:#DC143C"> **D3. Remover ruidos**

In [21]:
import datetime

medias = ['Mes', 'Ano', 'Valor1', 'Valor2']  # Cabeçalho
dfMedias = pd.DataFrame() # Criação de um dataframe vazio

anos = [2017,2018,2019]
meses =  [1,2,3,4,5,6,7,8,9,10,11,12]
ind = 0

for ano in anos:
    for mes in meses:
       
        dfMes = dfTemperatura
        d1  = datetime.datetime(ano, mes, 1)
        d28 = datetime.datetime(ano, mes, 28)
       
        Variavel = "Data>'" + str(d1) + "'and Data<'" + str(d28) + "'"
      
        dfresult = dfMes.query(Variavel)
        Linha1 = dfresult.shape[0]               
                
        dfresult.loc[dfresult['Valor30_1'] == 99.0, 'Valor30_1'] = 0
        dfresult.loc[dfresult['Valor30_2'] == 99.0, 'Valor30_2'] = 0
                   
        val  = dfresult['Valor30_1'].max()
        val1 = dfresult['Valor30_1'].min()
        
        val2  = dfresult['Valor30_2'].max()
        val3  = dfresult['Valor30_2'].min()
        
        media_1 = (val  + val1)/2
        media_2 = (val2 + val3)/2
        
       
        dfMedias = dfMedias.append({'Mes' : mes , 'Ano' : ano, 'Valor1' : media_1, 'Valor2' : media_2 }, ignore_index=True)
        # output_file = '../../Data/process/patio1_17a19_exportaMedias.csv'
        # dfMedias.to_csv(output_file, sep=';', encoding='utf-8')
       

In [22]:
dfTemperatura = dfTemperatura.reset_index()

#### <span style="color:#DC143C"> **D4. Agregar Magnitude para 1H**

In [23]:
dfTemperatura['Temperatura_Verif'] = (dfTemperatura['Valor30_1'] + dfTemperatura['Valor30_2']) / 2

In [24]:
# Formatar o resultado com 2 casas decimais
dfTemperatura['Temperatura_Verif'] = dfTemperatura['Temperatura_Verif'].round(decimals=3)

In [25]:
dfTemperatura = dfTemperatura.drop('Valor30_1', 1)
dfTemperatura = dfTemperatura.drop('Valor30_2', 1)

In [26]:
dfTemperatura.head(-10)

Unnamed: 0,index,Longitude,Latitude,Data,Temperatura_Verif
0,0,-42.75,-14.25,2017-01-01 00:00:00,22.192
1,1,-42.75,-14.25,2017-01-01 01:00:00,18.516
2,2,-42.75,-14.25,2017-01-01 02:00:00,17.633
3,3,-42.75,-14.25,2017-01-01 03:00:00,16.916
4,4,-42.75,-14.25,2017-01-01 04:00:00,16.483
...,...,...,...,...,...
8793,8793,-42.75,-14.25,2018-01-02 09:00:00,23.534
8794,8794,-42.75,-14.25,2018-01-02 10:00:00,24.666
8795,8795,-42.75,-14.25,2018-01-02 11:00:00,24.734
8796,8796,-42.75,-14.25,2018-01-02 12:00:00,24.550


### **E. Exporta Resultados**

In [27]:
output_file = '../../../../PrevisaoVento/Data/process/cenario2/temperatura_ajustada.csv'
dfTemperatura.to_csv(output_file, sep=';', encoding='utf-8')