# Programa para validar temperatura e umidade relativa da simulacao numerica
## Indices estatisticos calculados:
### RMSE, bias, correlacao de Pearson, RMSE bias, MAPE (erro percentual médio absoluto), MAE (Erro Absoluto Médio), Dpielke (indice de Pielke)
#### Referencias:Metodologia para analise de desempenho de simulações de sistemas convectivos na RMSP com o modelo ARPS (Hallak e Pereira Filho, 2011)

In [7]:
import glob
import os
import numpy as np
from scipy import stats
import pandas as pd
import xarray as xr
import datetime

In [None]:
obs_path = '...'

In [None]:
obsvs = list(glob.glob(os.path.join(obs_path, 'Observacao*.txt'))) # Identifica todos os dados de observação com formato Observacao_estacao(nome).txt
stations = ["SBMT","SBGR", "SBSP", "A701", "IAG", "A755", "Capao Redondo","Interlagos","Ponte dos Remedios", 
            "Parque D Pedro II","Pinheiros"]


In [None]:
def make_date (ano, mes, dia, hora):
    """
    Função para combinar ano, mes, dia e hora e tranformar em data
    """
    somedates = ano*10000000000 + mes*100000000 + dia*1000000 + hora*10000
    dates = [datetime.datetime.strptime(str(int(date)),'%Y%m%d%H%M%S') for date in somedates]
    return dates

In [None]:
def indices_estat (var_obs, var_model, var, estacao):
    f= open("Validacao_" + var + ".txt", "a")
    mean_modelo = np.mean(var_model)
    mean_obs = np.mean(var_obs)
    std_modelo = np.std(var_model)
    std_obs = np.std(var_obs)
    corr_pearson = stats.pearsonr(var_model, var_obs)[0]
    ME = mean_modelo - mean_obs
    RMSE = (np.square(np.subtract(var_model,var_obs)).mean())**(1/2)
    MAE = np.mean(np.abs(np.subtract(var_model,var_obs)))
    MAPE = np.mean((np.abs(np.subtract(var_model,var_obs)))/var_obs)
    RMSE_ub = (np.mean(np.square(np.subtract(var_model-np.mean(var_model), var_obs-np.mean(var_obs)))))**(1/2)
    Dpielke = (1-std_modelo/std_obs) + (RMSE/std_obs) + (RMSE_ub/std_obs)
    f.write ("Std_model_" + estacao + ":" + " " + str(std_modelo) + "\n")
    f.write ("Std_obs_" + estacao + ":" + " " + str(std_obs) + "\n")
    f.write ("Corr_Pearson_" + estacao + ":" + " " + str(corr_pearson) + "\n")
    f.write ("ME_" + estacao + ":" + " " + str(ME) + "\n")
    f.write ("RMSE_" + estacao + ":" + " " + str(RMSE) + "\n")
    f.write ("MAE_" + estacao + ":" + " " + str(MAE) + "\n")
    f.write ("MAPE_" + estacao + ":" + " " +str(MAPE*100) + "\n")
    f.write ("RMSE_ub_" + estacao + ":" + " " + str(RMSE_ub) + "\n")
    f.write ("Dpielke_" + estacao + ":" + " " + str(Dpielke) + "\n")
    f.write ("\n")
    return corr_pearson, ME, RMSE, MAE, MAPE*100, RMSE_ub, Dpielke

In [6]:
dfs = []
for obsv in obsvs:
    dados_obs = pd.read_table(obsv) # Ler dados de observacao
    ano_obs = dados_obs['ANO']
#    print(ano_obs)
    mes_obs = dados_obs['MES']
    dia_obs = dados_obs['DIA']
    hora_obs = dados_obs['HORA']
    dates = make_date(ano_obs, mes_obs, dia_obs, hora_obs)
#    print(dados_obs['ESTACAO'])
    dfs.append(pd.concat(axis=0, ignore_index=True, objs=[
                         pd.DataFrame.from_dict({'ESTACAO': dados_obs['ESTACAO'],
                                                 'DATA': dates,
                                                 'TEMP': dados_obs['TEMP(C)'],
                                                 'UR': dados_obs['UR(%)']
                                                 })
                            ]))
df = pd.concat(dfs, ignore_index=True)
df

Unnamed: 0,ESTACAO,DATA,TEMP,UR
0,A701,2016-12-18 00:00:00,20.2,75.0
1,A701,2016-12-18 01:00:00,20.0,76.0
2,A701,2016-12-18 02:00:00,19.8,77.0
3,A701,2016-12-18 03:00:00,19.7,77.0
4,A701,2016-12-18 04:00:00,19.2,79.0
...,...,...,...,...
314,SBSP,2016-12-18 20:00:00,24.0,69.1
315,SBSP,2016-12-18 21:00:00,23.0,69.0
316,SBSP,2016-12-18 22:00:00,23.0,69.0
317,SBSP,2016-12-18 23:00:00,22.0,73.3


In [7]:
latlon = np.array([[-23.44,-46.47],[-23.50,-46.63], #Guarulhos, Campo de Marte
                   [-23.49, -46.62], [-23.65,-46.62], #Mirante de Santana, IAG
            [-23.62,-46.65],[-23.5389,-46.86945],[-23.6655,-46.78207], #Congonhas, Barueri (INMET), Capao Redondo (CETESB)
            [-23.68097,-46.67577],[-23.51864,-46.74302], # Interlagos, Pontes dos remedios (CETESB)
            [-23.54602,-46.62717],[-23.56159,-46.70206]]) #Parque D.Pedro II, Pinheiros (CETESB) 


In [8]:
modelFile = '...'


In [9]:
dfs = []
for i in range(len(latlon)):
     # Leitura dos dados do modelo entre 00 UTC dia do evento e 00 UTC do dia seguinte com intervalo de 1 hora
    ds = xr.open_dataset(modelFile).sel(lat=latlon[i,0], lon=latlon[i,1], method="nearest").isel(time=slice(288,577,12))
    dfs.append(pd.concat(axis=0, ignore_index=True, objs=[
                         pd.DataFrame.from_dict({'ESTACAO': stations[i],
                                                 'DATA': ds['time'],
                                                 'TEMP': ds['tempc2m'],
                                                 'UR': ds['rh'].isel(lev=0)
                                                 })
                            ]))
df_model = pd.concat(dfs, ignore_index=True)

In [10]:
df_model

Unnamed: 0,ESTACAO,DATA,TEMP,UR
0,SBMT,2016-12-18 00:00:00,18.998474,80.603447
1,SBMT,2016-12-18 01:00:00,18.350403,84.725494
2,SBMT,2016-12-18 02:00:00,17.999634,85.732948
3,SBMT,2016-12-18 03:00:00,17.779968,86.403862
4,SBMT,2016-12-18 04:00:00,17.611115,87.358620
...,...,...,...,...
270,Pinheiros,2016-12-18 20:00:00,20.379578,76.148590
271,Pinheiros,2016-12-18 21:00:00,21.412262,75.123772
272,Pinheiros,2016-12-18 22:00:00,21.171295,80.736176
273,Pinheiros,2016-12-18 23:00:00,20.866974,84.069496


In [17]:
for station in stations:
    indices_estat(df.loc[df['ESTACAO']==station].reset_index()['TEMP'],df_model.loc[df_model['ESTACAO']==station].reset_index()['TEMP'],
                  'TEMP',station)
    indices_estat(df.loc[df['ESTACAO']==station].reset_index()['UR'],df_model.loc[df_model['ESTACAO']==station].reset_index()['UR'],
                  'UR',station)