### Packaged

In [31]:
import xarray as xr
import numpy as np
import pandas as pd

from scipy import stats

import sys
import os
import warnings
warnings.filterwarnings("ignore")

sys.path.append(os.path.abspath("d:/Users/julia/Desktop/Academia/01_Mestrado/Work/Utils"))
from uv2veldire import *

In [6]:
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

In [53]:
def compute_histogram(variable, bin_max, bin_min, dbin, pdf=False):
    """ Computes 1D histogram or probability density for a given variable.
        
    Keyword arguments:
    variable -- 1D array.
    bin_max -- maximum value for bins
    bin_min -- minimum value for bins
    dbin -- bin size
    pdf -- (default False)
    
    Returns:
    bins -- histogram bins
    counts -- either counts or probability density
        
    """
    bins = np.arange(bin_min, bin_max, dbin)
    count = []
    for i in range(len(bins)):
        ind = (variable>bins[i] - dbin/2) & (variable<=bins[i]+dbin/2)
        count.append(ind.sum())
    count = np.array(count)
    if pdf:
        norm_hist = count/count.sum()/dbin
        assert np.allclose(norm_hist.sum()*dbin, 1.0), "PDF doesn't sum to 1"
    
        return bins, norm_hist
    else:
        return bins, count

### Metar Database

#### Importing Data

In [2]:
Metar = pd.DataFrame({'label':['SBNT','SBJP','SBRF','SBMO','SBAR','SBSV'],
                      'Latitude':[-5.91,-7.15,-8.13,-9.51,-10.98,-12.91],
                      'Longitude': [-35.25,-34.97,-34.92,-35.79,-37.07,-38.33]})

i = [0,1,2,3,4,5]
Names = ['SBNT','SBJP','SBRF','SBMO','SBAR','SBSV']

df_list =[]
for n in range(len(Names)):
    df = pd.read_csv("D:/Users/julia/Desktop/AcaDemia/01_MestraDo/Dados/METAR/"+ Names[n] +".csv",sep=',',
    index_col='times', parse_dates=True)
    df_list.append(df)

### ERA5 Database

#### Import and Setting the Data

In [3]:
dsmerged = xr.open_dataset('D:/Users/julia//Desktop/Academia/01_Mestrado/Dados/ERA5.nc',)

In [4]:
era_SBNT = dsmerged.sel(latitude=-5.8, longitude=-35.1, method='nearest') # 2º point-5.8 -34.85
u_SBNT = era_SBNT['u10']
v_SBNT = era_SBNT['v10']

era_SBJP = dsmerged.sel(latitude=-7.05, longitude=-34.6, method='nearest') # 2º point-7.05 -34.35
u_SBJP = era_SBJP['u10']
v_SBJP = era_SBJP['v10']

era_SBRF = dsmerged.sel(latitude=-8.05, longitude=-34.6, method='nearest') # 2º point-8.05 -34.35
u_SBRF = era_SBRF['u10']
v_SBRF = era_SBRF['v10']

era_SBMO = dsmerged.sel(latitude=-9.55, longitude=-35.35, method='nearest') # 2º point-9.55 -35.1
u_SBMO = era_SBMO['u10']
v_SBMO = era_SBMO['v10']

era_SBAR = dsmerged.sel(latitude=-11.05, longitude=-36.85, method='nearest') # 2º point -11.05 -36.6
u_SBAR = era_SBAR['u10']
v_SBAR = era_SBAR['v10']

era_SBSV = dsmerged.sel(latitude=-13.05, longitude=-38.35, method='nearest') # 2º point -12.8 -37.85/-38.1
u_SBSV = era_SBSV['u10']
v_SBSV = era_SBSV['v10']

In [5]:
era = [era_SBNT,era_SBJP,era_SBRF,era_SBMO,era_SBAR,era_SBSV]
names = ['SBNT','SBJP','SBRF','SBMO','SBAR','SBSV']     

# Teste 1

In [None]:
era_SBSV['vel'],era_SBSV['dire'] = uv2veldire(u_SBSV,v_SBSV,corr=True)
e1 = era_SBSV.drop(['latitude','longitude'])
e1 = e1.to_dataframe()
e1 = e1['2002-06':]

In [72]:
m1 = df_list[-1]

## Removing outliers
m1.dropna(inplace=True)
m1 = m1[(np.abs(stats.zscore(m1)) < 2).all(axis=1)]

## Creating a regular Time
                    ### Intervalo Fixo de tempo
resample_index = pd.date_range(start='2002-06-01 00:00',end='2019-12-31 23:00', freq='H') 
dummy_frame = pd.DataFrame(np.NaN, index=resample_index, columns=m1.columns)
df=dummy_frame.combine_first(m1[:'2019-12-31 23:00'])
                    ### Intervalo da Serie
# resample_index = pd.date_range(start=dft.index[0],end=dft.index[-1], freq='1H')
# dummy_frame = pd.DataFrame(np.NaN, index=resample_index, columns=dft.columns)
# df=dummy_frame.combine_first(dft)


## Interpolating
df.interpolate(method='linear', limit=12, limit_direction='both', inplace=True)


In [63]:
df2=dummy_frame.combine_first(e1[:'2019-12-31 23:00'])
df2.interpolate(method='linear', limit=12, limit_direction='both', inplace=True)


In [71]:
m1.drop_duplicates()

Unnamed: 0_level_0,speed,direction,temperature,dewpoint,pressure
times,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2003-01-01 15:00:00,5.144440,60.0,30.0,24.0,1015.0
2003-01-01 17:00:00,5.144440,40.0,30.0,24.0,1013.0
2003-01-02 11:00:00,5.144440,40.0,27.0,23.0,1015.0
2003-01-03 02:00:00,1.028888,60.0,25.0,22.0,1013.0
2003-01-03 05:00:00,4.115552,60.0,24.0,23.0,1012.0
...,...,...,...,...,...
2021-01-25 19:00:00,5.144440,100.0,29.0,22.0,1012.0
2021-01-25 20:00:00,4.629996,100.0,29.0,22.0,1012.0
2021-01-25 21:00:00,3.601108,90.0,28.0,22.0,1012.0
2021-01-25 22:00:00,1.543332,140.0,26.0,22.0,1012.0


In [52]:
mean_sst = []
std_sst = []
mean_sub = []
std_sub = []

coef = []

for i,j in zip(merc_s,mur_s):
    m,s = np.mean(j), np.std(j)
    n, d = np.mean(i), np.std(i)

    mean_sst.append(m[0])
    std_sst.append(s[0])
    mean_sub.append(n[0])
    std_sub.append(d[0])
    coef.append(np.round(np.corrcoef(j.analysed_sst,i.thetao)[1][0],4))

stat = pd.DataFrame({'mean_MUR' : mean_sst, 'std_MUR' : std_sst,'mean_Merc' : mean_sub,'std_Merc' : std_sub, 'r2' : coef}, columns=['mean_MUR','std_MUR', 'mean_Merc','std_Merc','r2'],index=st)
stat

In [57]:
rmse(e1.dire,df.direction)
np.corrcoef(e1.dire,df.direction)


ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 154152 and the array at index 1 has size 154662

In [59]:
e1.dire


time
2002-06-01 00:00:00    141.232147
2002-06-01 01:00:00    146.014252
2002-06-01 02:00:00    145.705170
2002-06-01 03:00:00    143.241699
2002-06-01 04:00:00    140.967346
                          ...    
2019-12-31 19:00:00     88.171646
2019-12-31 20:00:00     76.455589
2019-12-31 21:00:00     64.640846
2019-12-31 22:00:00     64.005737
2019-12-31 23:00:00     58.535439
Name: dire, Length: 154152, dtype: float32

In [60]:
df.direction

2002-06-01 00:00:00     NaN
2002-06-01 01:00:00     NaN
2002-06-01 02:00:00     NaN
2002-06-01 03:00:00     NaN
2002-06-01 04:00:00     NaN
                       ... 
2019-12-31 19:00:00    45.0
2019-12-31 20:00:00    20.0
2019-12-31 21:00:00    70.0
2019-12-31 22:00:00    70.0
2019-12-31 23:00:00    70.0
Name: direction, Length: 154662, dtype: float64

In [50]:
pd.date_range(start = '2002-06-01 00:00:00', end = '2019-12-31 23:00:00' ).difference(df.index)

DatetimeIndex([], dtype='datetime64[ns]', freq=None)

In [40]:
iii= e1.reindex(pd.date_range('2002-01-01 00:00:00', '2019-12-31 23:00:00')).isnull().all(1)

In [44]:
np.where(iii)

(array([], dtype=int64),)

In [49]:
e1['2002-06':]

Unnamed: 0_level_0,u10,v10,t2m,msl,sst,sp,vel,dire
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2002-06-01 00:00:00,-2.980964,3.847490,26.057373,101721.101562,300.189667,101642.8750,4.867168,141.232147
2002-06-01 01:00:00,-2.905829,4.477021,26.161743,101746.890625,300.189667,101668.8750,5.337374,146.014252
2002-06-01 02:00:00,-3.098213,4.717507,26.189575,101762.437500,300.189667,101684.1250,5.643917,145.705170
2002-06-01 03:00:00,-3.474721,4.825216,25.944702,101728.562500,300.189667,101650.2500,5.946125,143.241699
2002-06-01 04:00:00,-3.708346,4.740896,25.878906,101683.406250,300.189667,101604.8125,6.018964,140.967346
...,...,...,...,...,...,...,...,...
2019-12-31 19:00:00,-4.254151,-0.061509,26.487061,100983.875000,300.971741,100906.1250,4.254596,88.171646
2019-12-31 20:00:00,-5.589502,-1.243710,26.620117,101006.195312,300.971741,100928.6250,5.726199,76.455589
2019-12-31 21:00:00,-5.030678,-2.277695,26.343994,101063.640625,300.971741,100985.8750,5.522283,64.640846
2019-12-31 22:00:00,-3.591888,-1.674487,26.546631,101103.187500,300.816895,101025.7500,3.963025,64.005737
