# Programa para plotar grafico de dispersao para temperatura e UR estacoes x simulacao
## Utiliza todas as estacoes disponiveis
## Calcula e escreve no gráfico a correlacao de Pearson e hit rate

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import glob
import os
import pandas as pd
import xarray as xr
from scipy import stats
import verif.metric
import seaborn as sns
import datetime

In [2]:
obs_path = '...'

In [26]:
obsvs = list(glob.glob(os.path.join(obs_path, 'Observacao*.txt'))) # Identifica todos os dados de observação com formato Observacao_estacao(nome).txt
stations = ["SBGR","SBMT","A701","IAG","SBSP","A755", "Capao Redondo","Interlagos","Ponte dos Remedios", 
            "Parque D Pedro II","Pinheiros"]

In [68]:
def plot_var (data_model, data_obs, **kwargs):
    corr_pearson = stats.pearsonr(data_model, data_obs)
    interval = verif.interval.Interval(0.0, np.inf, True, True)
    r = verif.metric.Corr().compute_from_obs_fcst(np.array(data_obs), np.array(data_model))
    n = verif.metric.N().compute_from_obs_fcst(np.array(data_obs), np.array(data_model),interval)
    a = 0
    a = 0
    for t in range(0,n):
     dif = data_model[t] - data_obs[t]
    #seleciona o intervalo para calcular o hit rate
     if kwargs.get('var') == "Temp": 
      if (dif >= -2.0 and dif <=2.0):
       a = a + 1
     if kwargs.get('var') == "UR":
      if (dif >= -10.0 and dif <=10.0):
       a = a + 1
    h = (a/n)*100
    fig, ax = plt.subplots(figsize=(6, 6))  
    plt.subplots_adjust(bottom=0.3)
    sns.regplot(x = data_obs,
            y = data_model)
    ax.annotate('r={:.2f}'.format(r), xy=(0,1), xycoords="axes fraction",
                    xytext=(5,-5), textcoords="offset points",
                    ha="left", va="top")
    ax.annotate('hit rate={:.1f} %'.format(h), xy=(0,1), xycoords="axes fraction",
                    xytext=(5,-20), textcoords="offset points",
                    ha="left", va="top")
    ax.tick_params(axis='both', which='major', labelsize=10)
    ax.set_ylabel(kwargs.get('label_y'),  size = 12)
    ax.set_xlabel(kwargs.get('label_x'),  size = 12)
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    plt.savefig(kwargs.get('var'), dpi=300, bbox_inches='tight')
    plt.close()

In [65]:
dfs = []
for obsv in obsvs:
    dados_obs = pd.read_table(obsv) # Ler dados de observacao
    dfs.append(pd.concat(axis=0, ignore_index=True, objs=[
                         pd.DataFrame.from_dict({'ESTACAO': dados_obs['ESTACAO'],
                                                 'TEMP_OBS': dados_obs['TEMP(C)'],
                                                 'UR_OBS': dados_obs['UR(%)']
                                                 })
                            ]))
df_obs = pd.concat(dfs, ignore_index=True)
df_obs

Unnamed: 0,ESTACAO,TEMP_OBS,UR_OBS
0,A701,20.2,75.0
1,A701,20.0,76.0
2,A701,19.8,77.0
3,A701,19.7,77.0
4,A701,19.2,79.0
...,...,...,...
270,SBSP,24.0,69.1
271,SBSP,23.0,69.0
272,SBSP,23.0,69.0
273,SBSP,22.0,73.3


In [36]:
latlon = np.array([[-23.44,-46.47],[-23.50,-46.63], #Guarulhos, Campo de Marte
                   [-23.49, -46.62], [-23.65,-46.62], #Mirante de Santana, IAG
            [-23.62,-46.65],[-23.5389,-46.86945],[-23.6655,-46.78207], #Congonhas, Barueri (INMET), Capao Redondo (CETESB)
            [-23.68097,-46.67577],[-23.51864,-46.74302], # Interlagos, Pontes dos remedios (CETESB)
            [-23.54602,-46.62717],[-23.56159,-46.70206]]) #Parque D.Pedro II, Pinheiros (CETESB) 


In [37]:
dataFile = '...'


In [38]:
dfs = []
for i in range(len(latlon)):
    # Leitura dos dados entre 00 do dia do evento e 00 do dia seguinte com intervalo de 1 hora
    ds = xr.open_dataset(dataFile).sel(lat=latlon[i,0], lon=latlon[i,1], method="nearest").isel(time=slice(288,577,12))
    dfs.append(pd.concat(axis=0, ignore_index=True, objs=[
                         pd.DataFrame.from_dict({'ESTACAO': stations[i],
                                                 'DATA': ds['time'],
                                                 'TEMP': ds['tempc2m'],
                                                 'UR': ds['rh'].isel(lev=0)
                                                 })
                            ]))
# Os dados sao reorganizados por ordem alfabetica pelo nome das estacoes para coincidir com a ordem dos dados no
# dataframe dos dados observados
df_model = pd.concat(dfs, ignore_index=True).sort_values(by=['ESTACAO','DATA'], ascending=True).reset_index()
df_model

Unnamed: 0,index,ESTACAO,DATA,TEMP,UR
0,50,A701,2016-12-18 00:00:00,19.874084,78.862732
1,51,A701,2016-12-18 01:00:00,19.136444,82.680962
2,52,A701,2016-12-18 02:00:00,18.684021,86.189758
3,53,A701,2016-12-18 03:00:00,18.396271,86.774231
4,54,A701,2016-12-18 04:00:00,18.252411,87.307869
...,...,...,...,...,...
270,120,SBSP,2016-12-18 20:00:00,23.567169,74.148224
271,121,SBSP,2016-12-18 21:00:00,22.950806,79.208008
272,122,SBSP,2016-12-18 22:00:00,22.195557,81.942039
273,123,SBSP,2016-12-18 23:00:00,21.474121,87.238869


In [60]:
data = {'TempObs': df_obs['TEMP_OBS'],
        'TempModel': df_model['TEMP'],
        'URObs': df_obs['UR_OBS'],
        'URModel': df_model['UR']}
df = pd.DataFrame(data).dropna().reset_index()
df

Unnamed: 0,index,TempObs,TempModel,URObs,URModel
0,0,20.2,19.874084,75.0,78.862732
1,1,20.0,19.136444,76.0,82.680962
2,2,19.8,18.684021,77.0,86.189758
3,3,19.7,18.396271,77.0,86.774231
4,4,19.2,18.252411,79.0,87.307869
...,...,...,...,...,...
264,270,24.0,23.567169,69.1,74.148224
265,271,23.0,22.950806,69.0,79.208008
266,272,23.0,22.195557,69.0,81.942039
267,273,22.0,21.474121,73.3,87.238869


In [69]:
plot_var (df['TempModel'], df['TempObs'], var="Temp",label_x='Temperatura observada ($^\circ$C)',
          label_y='Temperatura simulada ($^\circ$C)') 
plot_var (df['URModel'], df['URObs'], var="UR",label_x='Umidade relativa observada (%)',
          label_y='Umidade relativa simulada (%)') 