In [1]:
# Obtiene los datos de precipitación par la Cuenca del Valle
# de Méxco a partir de CHIRPS y los promedia espacialmente.

import os

import rioxarray

import pandas as pd
import numpy as np

import geopandas as gpd
import xarray as xr

from scipy.stats import linregress

# Datos
path_d = "../results/onset/"
path_r = "../results/onset/graficas/"

# Variable, nivel de presión y región.
vars = [ "u", "v", "gp", "sst", "olr", "sp", #"vidmf",
    "vivfu", "vivfv" ]
levels = [ "925", "200" ]
region = [ "mexico" ]

# Cantidad de días promediados.
d = 20

# Fechas de onset por precipitación
date_pre = pd.read_csv(path_d + "onset_CHIRPS.csv", index_col = "Año" )
date_pre.index = pd.RangeIndex(date_pre.index[0], date_pre.index[-1] + 1, 
    name = "Año" )
date_pre.iloc[:, 0] = pd.to_datetime( date_pre.iloc[:, 0] )
date_pre.iloc[:, 1] = pd.to_datetime( date_pre.iloc[:, 1] )

# Se convierten las fechas a días julianos.
date_pre["Cuenca - Onset"] = date_pre["Cuenca - Onset"].apply(
    lambda x: pd.to_datetime(x) )
date_pre["Cuenca - Onset"] = date_pre["Cuenca - Onset"].apply(
    lambda x: x.dayofyear )

df = pd.read_csv(path_d + "pre_daily_CHIRPS.csv", index_col = "time" )
df.index = pd.to_datetime( df.index )

In [2]:
# Crea un índice de onset de acuerdo con
# el umbral para n días seguidos de
# cumplimiento con el criterio.
def pre_onset( umbral = [2.25, 2], n = [10, 10], m_i = 4 ):

    df_mean_ind = pd.DataFrame( df.copy(), columns = ["Cuenca"] )

    years = range(1981, 2021)
    c = "Cuenca"
    title = "onset_withdrawal_CHIRPS_diferente.png"

    # Onset.
    # Nombre de las columnas dummy.
    proc = [c]
    for i in range(0, n[0]):
        proc.append(c + "_" + str(i)) 
    # Traslapa los últimos 3 días 
    # y da un valor de 1 a cada día 
    # que cumpla con el umbral.
    for i, p in enumerate(proc[1:]):
        df_mean_ind[p] = df_mean_ind[proc[0]].shift(i)
        df_mean_ind[p].clip( upper = umbral[0],
            inplace = True )
        df_mean_ind[p] = df_mean_ind[p].where(
            df_mean_ind[p] == umbral[0], 0)
        df_mean_ind[p] = df_mean_ind[p].where(
            df_mean_ind[p] < umbral[0], 1)
    # El índice es uno si los n
    # cumplen con el umbral.
    ind = "Index_" + c
    df_mean_ind[ind] = 0
    for i in range(n[0]):
        df_mean_ind[ind] += df_mean_ind[proc[i + 1]]
    df_mean_ind[ind] = df_mean_ind[ind].where( df_mean_ind[ind] == n[0], 0 )
    df_mean_ind[ind] = df_mean_ind[ind].where( df_mean_ind[ind] < n[0], 1 )
    df_mean_ind[ind] = df_mean_ind[ind].where(
        df_mean_ind.index.month.isin( range(5, 12) ), 0 )
    df_mean_ind.drop( proc[1:], axis = 1, inplace = True )
    # Crea una tabla con los valores anuales.
    col = []
    name = "Cuenca"
    col.append(name + " - Onset")
    col.append(name + " - Withdrawal")
    date = pd.DataFrame()
    for j in years:
        # Solo utilizamos valores entre mayo y noviembre.
        df_year = df_mean_ind[ (df_mean_ind.index.year == j)
            & df_mean_ind.index.month.isin(range(m_i, 12)) ]
        row = []
        # Fechas.
        row.append( df_year.iloc[:, 1].idxmax() )
        row.append( None )
        #row.append( df_year.iloc[::-1, 1].idxmax() )
        # Agregamos la fila al DataFrame.
        date = date.append([row])
    # Recorremos al inicio de la racha de días de precipitación.
    date.iloc[:, 0] = date.iloc[:, 0] - np.timedelta64(n[0], 'D')

    # Withdrawal.
    # Nombre de las columnas dummy.
    proc = [c]
    for i in range(0, n[1]):
        proc.append(c + "_" + str(i)) 
    # Traslapa los últimos 3 días 
    # y da un valor de 1 a cada día 
    # que cumpla con el umbral.
    for i, p in enumerate(proc[1:]):
        df_mean_ind[p] = df_mean_ind[proc[0]].shift(i)
        df_mean_ind[p].clip( upper = umbral[1],
            inplace = True )
        df_mean_ind[p] = df_mean_ind[p].where(
            df_mean_ind[p] == umbral[1], 0)
        df_mean_ind[p] = df_mean_ind[p].where(
            df_mean_ind[p] < umbral[1], 1)
    # El índice es uno si los n
    # cumplen con el umbral.
    ind = "Index_" + c
    df_mean_ind[ind] = 0
    for i in range(n[1]):
        df_mean_ind[ind] += df_mean_ind[proc[i + 1]]
    df_mean_ind[ind] = df_mean_ind[ind].where( df_mean_ind[ind] == n[1], 0 )
    df_mean_ind[ind] = df_mean_ind[ind].where( df_mean_ind[ind] < n[1], 1 )
    df_mean_ind[ind] = df_mean_ind[ind].where(
        df_mean_ind.index.month.isin( range(5, 12) ), 0 )
    df_mean_ind.drop( proc[1:], axis = 1, inplace = True )
    # Crea una tabla con los valores anuales.
    col = []
    name = "Cuenca"
    col.append(name + " - Onset")
    col.append(name + " - Withdrawal")
    date_2 = pd.DataFrame()
    for j in years:
        # Solo utilizamos valores entre mayo y noviembre.
        df_year = df_mean_ind[ (df_mean_ind.index.year == j)
            & df_mean_ind.index.month.isin(range(m_i, 12)) ]
        row = []
        # Fechas.
        row.append( None )
        #row.append( df_year.iloc[:, 1].idxmax() )
        row.append( df_year.iloc[::-1, 1].idxmax() )
        # Agregamos la fila al DataFrame.
        date_2 = date_2.append([row])
    # Recorremos al inicio de la racha de días de precipitación.
    date.iloc[:, 1] = date.iloc[:, 1] - np.timedelta64(n[1], 'D')

    # Unimos onset y withdrawal.
    date.iloc[:, 1] = date_2.iloc[:, 1]

    # Damos formato a las columnas y al índice.
    date.columns = col
    date.index = years
    date.index.name = "Año"

    # Longitud de la temporada.
    date[name + " - Length"] = date.iloc[:, 1] - date.iloc[:, 0]

    # Calculamos el shift necesario para alinear las fechas.
    shift = date.iloc[:, 0].apply(lambda x: x.dayofyear)
    shift_2 = date.iloc[:, 0].apply(lambda x: x.dayofyear)

    return date, shift, shift_2

In [4]:
def onset( df, vars = [-12.5, 7] , m_i = 5 ):

    umbral = vars[0]
    n = int( vars[1] )

    c = "Cuenca"

    # Nombre de las columnas dummy.
    proc = [c]
    for i in range( 0, n ): proc.append(c + "_" + str(i))

    # Traslapa los últimos 3 días 
    # y da un valor de 1 a cada día 
    # que cumpla con el umbral.
    # Umbral positivo.
    if vars[0] >= 0:
        for i, p in enumerate(proc[1:]):
            df[p] = df[proc[0]].shift(i)
            df[p].clip( upper = umbral, inplace = True )
            df[p] = df[p].where( df[p] == umbral, 0 )
            df[p] = df[p].where( df[p] < umbral, 1 )
    # Umbral negativo
    if vars[0] < 0:
        for i, p in enumerate(proc[1:]):
            df[p] = df[proc[0]].shift(i)
            df[p].clip( lower = umbral, inplace = True )
            df[p] = df[p].where( df[p] == umbral, 0 )
            df[p] = df[p].where( df[p] > umbral, 1 )

    # El índice es uno si los n
    # cumplen con el umbral.
    ind = "Index_" + c
    df[ind] = 0
    for i in range( n ):
        df[ind] += df[proc[i + 1]]
    df[ind] = df[ind].where( df[ind] == n, 0 )
    df[ind] = df[ind].where( df[ind] < n, 1 )
    df[ind] = df[ind].where( df.index.month.isin( range(m_i, 12) ), 0 )
    df.drop(proc[1:], axis = 1, inplace = True)

    # Crea una tabla con los valores anuales.
    date = date_pre["Cuenca - Onset"].copy()
    years = range(1981, 2021)

    for j in years:
        # Solo utilizamos valores entre mayo y noviembre.
        df_year = df[ (df.index.year == j)
            & df.index.month.isin(range(m_i, 12)) ]

        # Fechas.
        date[j] = df_year.iloc[:, 1].idxmax()

    # Damos formato a las columnas y al índice.
    date.index = years
    # Recorremos al inicio de la racha de días de precipitación.
    date = date - np.timedelta64(n, 'D')

    # Correlación entre índice de precipitación y de viento.

    # Se convierten las fechas a días julianos.
    date = date.apply( lambda x: pd.to_datetime(x) )
    date = date.apply( lambda x: x.dayofyear )

    # Correlaciones de las series.
    linreg = linregress(date, date_pre["Cuenca - Onset"])
    corr = linreg.rvalue
    #corr = date.corr( date_pre["Cuenca - Onset"] )

    # Liberamos memoria.
    del date

    return corr

def ind_max( ds, zone = (-80, -70, 12.5, 17.5), n = (1, 20), num = 10 ):
    # Precipitación diaria promedio en toda la cuenca.
    data = ( ds.sel( longitude = slice( zone[0], zone[1] ), 
        latitude = slice( zone[3], zone[2] ) )
        .mean(dim = ["latitude", "longitude"]).to_dataframe() )
    data.columns = ["Cuenca"]

    u = [ np.fix( data.min()[0] * 2 ) / 2,
        np.fix( data.max()[0] * 2 ) / 2, num ]

    c = np.stack( np.meshgrid( np.linspace( *u ), np.linspace( *n ) ),
        axis = 2 )
    res = np.ndarray(shape = c.shape[0:2])

    for i in range(c.shape[0]):
        for j in range(c.shape[1]):
            res[i, j] = onset( data, c[i, j] )

    return ( res.max(), c[ np.argmax(res) // res.shape[1],
        np.argmax(res) % res.shape[1] ], u )

def eval(v = 0, n = 0, step = 20, num = 5):
    print(f"var: {v}, lev: {n}")

    # Variable a utilizar.
    if v > 2: lev = ""
    else: lev = "_" + levels[n]

    lon = np.arange(-120, -50 + step, step)
    lat = np.arange(0 , 40 + step, step)

    lon = np.stack( [ lon[0:-1], lon[1:] ], axis = 1)
    lat = np.stack( [ lat[0:-1], lat[1:] ], axis = 1)

    a = np.repeat(lon, lat.shape[0], axis = 0)
    b = np.tile(lat, [lon.shape[0], 1])

    bounds = np.concatenate([a, b], axis = 1)

    for a in [ ".", "_anom." ]:
        x = []
        y = np.ndarray([0])

        fname = ( "onset_" + vars[v] + lev + "_mean_"
            + str(d) + "_dias" + a + "grib" )
        ds = xr.open_dataset( path_d + fname, engine = "cfgrib" )
        if v < 3:
            ds = ds.drop_vars(["step", "isobaricInhPa", "valid_time"])
        elif v > 5:
            ds = ds.drop_vars(["step", "entireAtmosphere", "valid_time"])
        else:
            ds = ds.drop_vars(["step", "surface", "valid_time"])

        for i in bounds:
            z = ind_max( ds, zone = i, num = num )
            x.append( z )
            y = np.append( y, [z[0]], axis = 0 )

        print( f"{bounds[np.argmax(y)]} {x[np.argmax(y)]}" )

def check():
    for v in range(0, 3):
        for n in range(0, 2):
            eval(v = v, n = n)
    for v in range(3, 10):
        eval(v = v)

In [None]:
check()

var: 0, lev: 0
[-100  -80    0   20] (0.4323711902099529, array([ 1.25      , 14.18367347]), [-5.5, 3.5, 5])
[-100  -80    0   20] (0.44131827037106475, array([ 1.5       , 16.12244898]), [-3.0, 3.0, 5])
var: 0, lev: 1


Ignoring index file '/Users/rodrigo/Documents/Posgrado/Maestria/Tesis/sequia/results/onset/onset_u_200_mean_20_dias_anom.grib.90c91.idx' older than GRIB file


[-100  -80    0   20] (0.37100315013914775, array([ 5.5      , 19.2244898]), [-13.0, 24.0, 5])
[-80 -60  20  40] (0.4274629003313584, array([-0.25      , 11.08163265]), [-18.5, 18.0, 5])
var: 1, lev: 0
[-60 -40  20  40] (0.42560621728614406, array([2.875, 1.   ]), [-5.0, 5.5, 5])
[-60 -40   0  20] (0.41261979624831235, array([-0.875     ,  5.26530612]), [-2.0, 2.5, 5])
var: 1, lev: 1
[-100  -80    0   20] (0.38423673673260006, array([-2., 20.]), [-7.5, 14.5, 5])
[-80 -60   0  20] (0.380395239929708, array([-3.75      ,  2.16326531]), [-8.5, 10.5, 5])
var: 2, lev: 0
[-100  -80   20   40] (0.3911382730680459, array([8.0615e+03, 1.0000e+00]), [7163.0, 8361.0, 5])
[-120 -100    0   20] (0.375546628332246, array([67.625     ,  4.10204082]), [-235.0, 168.5, 5])
var: 2, lev: 1
[-80 -60  20  40] (0.2500541045072527, array([1.22179e+05, 1.00000e+00]), [117133.0, 122179.0, 5])
[-60 -40   0  20] (0.2613452437173187, array([541.375     ,  10.30612245]), [-816.5, 994.0, 5])
var: 3, lev: 0
[-80 -60 