# HydroSOS - Persistence and Hydrological Analogues method

In this notebook, we explore the Persistance and Hydrological Analogue method

## Import the python libraries

In [1]:
%reset -f

In [2]:
# Importing the libraries
import pandas as pd
import seaborn as sns
import numpy as np
import calendar
import datetime
import matplotlib.pyplot as plt
import math
from scipy import stats
plt.style.use('classic')
%matplotlib inline

from IPython.display import HTML

sns.set()

## Functions

In [3]:
# Quantile Function Definitions
def q1(x):
    return x.quantile(0.28)

def q2(x):
    return x.median()

def q3(x):
    return x.quantile(0.72)

def q5(x):
    return x.quantile(0.05)

def q95(x):
    return x.quantile(0.95)

def q87(x):
    return x.quantile(0.87)

def q13(x):
    return x.quantile(0.13)


## Import Data 

In this example we import the daily discharge data

In [4]:
today = datetime.date.today()

In [56]:
DISCHARGE_DAILY = pd.read_csv('../notebook_verificacion/santalucia_caudales_prueba.csv',parse_dates=['Fecha'],index_col="Fecha",dayfirst=True,na_values=" ")
# Identify the missing data
DISCHARGE_DAILY_date_missing = pd.date_range(start = '1980-01-01', end = today ,freq='D')
DISCHARGE_DAILY = DISCHARGE_DAILY.reindex(DISCHARGE_DAILY_date_missing,fill_value=None)
DISCHARGE_DAILY.index.name = 'Fecha' # Set index Fecha
DISCHARGE_DAILY.columns = ['Station','Discharge']
DISCHARGE_DAILY = DISCHARGE_DAILY.drop(columns='Station')
HTML(DISCHARGE_DAILY.head(6).to_html())

Unnamed: 0_level_0,Discharge
Fecha,Unnamed: 1_level_1
1980-01-01,1.738
1980-01-02,1.738
1980-01-03,1.738
1980-01-04,1.738
1980-01-05,1.738
1980-01-06,1.738


## Calculate monthly mean from daily data

First, we identify the months which contains lower that 5 null value observation.

In [57]:
# group the Dataframe in a monthly time scale
GROUPER_DISCHARGE_MONTHLY = DISCHARGE_DAILY.groupby(pd.Grouper(freq='1MS'))
NUMBER_MISSING = GROUPER_DISCHARGE_MONTHLY.apply(lambda x: pd.isnull(x).sum()).unstack(1)
# sum the number of null values and assing a "true" if the number of null values are lower that a number for a given month
BOOL_MISSING = GROUPER_DISCHARGE_MONTHLY.apply(lambda x: pd.isnull(x).sum()).unstack(1) < 5
BOOL_MISSING = BOOL_MISSING.to_frame() # convert to DataFrame
BOOL_MISSING.columns = ['missing'] # change the column name to "missing"

NUMBER_MISSING = NUMBER_MISSING.to_frame()
NUMBER_MISSING.columns = ['number_missing'] # change the column name to "missing"

After this, we calculate the monthly mean flow only if the null values in that month are below 5 values. Also define de water year starting in april.

In [58]:
DISCHARGE_MONTHLY = DISCHARGE_DAILY.resample('M').apply(lambda x: x.mean() if x.isnull().sum() < 5 else np.nan)
DISCHARGE_MONTHLY['Year'] = DISCHARGE_MONTHLY.index.year
DISCHARGE_MONTHLY['Month'] = DISCHARGE_MONTHLY.index.month
DISCHARGE_MONTHLY['water_year'] = DISCHARGE_MONTHLY.index.year.where(DISCHARGE_MONTHLY.index.month < 4, DISCHARGE_MONTHLY.index.year + 1)

HTML(DISCHARGE_MONTHLY.head(18).to_html(index=False))

Discharge,Year,Month,water_year
1.198613,1980,1,1980
28.386414,1980,2,1980
86.782323,1980,3,1980
172.187367,1980,4,1981
275.560258,1980,5,1981
228.526533,1980,6,1981
203.495161,1980,7,1981
167.313355,1980,8,1981
61.411033,1980,9,1981
138.581516,1980,10,1981


In [59]:
## create column for day, month, year in the daily discharge ()
# DISCHARGE_DAILY['Year'] = DISCHARGE_DAILY.index.year
# DISCHARGE_DAILY['Month'] = DISCHARGE_DAILY.index.month
# DISCHARGE_DAILY['Day'] = DISCHARGE_DAILY.index.day
# DISCHARGE_DAILY['monthday'] = DISCHARGE_DAILY.index.day_of_year
# DISCHARGE_DAILY['water_year'] = DISCHARGE_DAILY.index.year.where(DISCHARGE_DAILY.index.month < 4, DISCHARGE_DAILY.index.year + 1)
## 
# DISCHARGE_MONTHLY.to_clipboard()

## Select by Climatology range

First, we transform the discharge to a logaritmic scale and we insert it in the dataframe

In [60]:
DISCHARGE_MONTHLY['Q_to_log'] = np.log(DISCHARGE_MONTHLY['Discharge'])

In this part we select the reference period for analysis. In this case we use the 1980/01-2023/01 period

In [61]:
DISCHARGE_MONTHLY_SELECTED = DISCHARGE_MONTHLY.loc['1980-01-01': today]
HTML(DISCHARGE_MONTHLY_SELECTED.tail(6).to_html(index=False))

Discharge,Year,Month,water_year,Q_to_log
,2022,10,2023,
,2022,11,2023,
,2022,12,2023,
,2023,1,2023,
,2023,2,2023,
,2023,3,2023,


In [62]:
HTML(DISCHARGE_MONTHLY_SELECTED.head(15).to_html(index=False))

Discharge,Year,Month,water_year,Q_to_log
1.198613,1980,1,1980,0.181165
28.386414,1980,2,1980,3.345911
86.782323,1980,3,1980,4.463403
172.187367,1980,4,1981,5.148583
275.560258,1980,5,1981,5.618806
228.526533,1980,6,1981,5.431652
203.495161,1980,7,1981,5.315642
167.313355,1980,8,1981,5.119868
61.411033,1980,9,1981,4.11759
138.581516,1980,10,1981,4.931459


Extract the years from the monthly discharge dataframe

In [63]:
YEAR_SELECTED = np.unique(DISCHARGE_MONTHLY_SELECTED['Year'])
display(YEAR_SELECTED)

array([1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990,
       1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
       2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023],
      dtype=int64)

In [64]:
YEAR_SELECTED[YEAR_SELECTED<1992]

array([1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990,
       1991], dtype=int64)

In [65]:
len(YEAR_SELECTED)

44

In [67]:
DISCHARGE_MONTHLY_SELECTED[DISCHARGE_MONTHLY_SELECTED['Month']==1]

Unnamed: 0_level_0,Discharge,Year,Month,water_year,Q_to_log
Fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980-01-31,1.198613,1980,1,1980,0.181165
1981-01-31,9.649097,1981,1,1981,2.266864
1982-01-31,2.314161,1982,1,1982,0.839047
1983-01-31,5.504935,1983,1,1983,1.705645
1984-01-31,31.699452,1984,1,1984,3.456299
1985-01-31,3.292613,1985,1,1985,1.191681
1986-01-31,6.365613,1986,1,1986,1.850911
1987-01-31,,1987,1,1987,
1988-01-31,35.197677,1988,1,1988,3.56098
1989-01-31,3.159548,1989,1,1989,1.150429


## Calculate Monthly statistics for Q Log-transformed 

In this part we calculate monthly statistcs for the QLog Variable.

 The statistis include: mean, min, max, std, q1(quantile 0.28), q3(quantile 0.72), q5(0.05), q95(0.95), q87(0.87), q13(0.13)

In [68]:
DISCHARGE_MONTHLY_STATS = DISCHARGE_MONTHLY_SELECTED.Q_to_log.groupby(DISCHARGE_MONTHLY_SELECTED.index.month).agg([np.min, q1, q2, np.mean, q3, np.max, np.std, q95, q5, q87, q13])
# Display results
HTML(DISCHARGE_MONTHLY_STATS.to_html())

Unnamed: 0_level_0,amin,q1,q2,mean,q3,amax,std,q95,q5,q87,q13
Fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.181165,1.183431,1.705645,1.787666,2.244798,3.56098,1.045717,3.50864,0.510106,3.099469,0.932462
2,1.178985,2.342791,3.179709,3.016745,3.396043,6.093265,1.361784,4.963236,1.249345,3.762216,1.483624
3,1.883088,2.273633,2.653676,3.203675,4.096088,5.221666,1.200036,4.887004,1.909339,4.52566,1.955076
4,1.979313,2.817172,3.169494,3.514373,4.285434,5.931557,1.256545,5.54007,2.097072,4.928674,2.324338
5,1.41802,3.607824,4.238582,4.133933,4.886562,6.181355,1.381214,5.90008,2.107963,5.445454,2.819214
6,1.814417,4.452242,4.929769,4.596483,5.361656,5.908143,1.319481,5.899078,2.293442,5.81042,3.108011
7,3.029434,4.153683,4.858205,4.613071,5.257975,6.220885,1.029457,5.813526,3.033228,5.307056,3.197951
8,2.515746,4.644342,5.245967,4.995786,5.415821,6.566886,1.132813,6.279046,3.287884,5.831976,4.448014
9,3.113517,3.790079,4.708572,4.474032,5.022147,6.008443,1.006962,5.772492,3.113973,5.417555,3.169056
10,1.63017,3.402086,4.487702,4.152309,5.079767,6.114101,1.451737,5.779274,1.975776,5.271015,2.521172


In [105]:
## Calculate Daily statistics

#We reproduce the same procedure for the Daily dataframe

# DISCHARGE_DAILY_STATS = DISCHARGE_DAILY.Discharge.groupby(DISCHARGE_DAILY.monthday).agg([np.min, q1, q2, np.mean, q3, np.max, np.std, q5, q95, q87, q13])
# HTML(DISCHARGE_DAILY_STATS.head(12).to_html())

We re-shape the monthly log discharge dataframe using a pivot table for the Discharge (pivQ) and the log Discharge (pivLogQ)

In [69]:
DISCHARGE_MONTHLY_PIVOT = pd.pivot_table(DISCHARGE_MONTHLY_SELECTED, index=['Month'],columns=['Year'], values=['Discharge'],dropna=False)

In [70]:
DISCHARGE_MONTHLY_PIVOT.columns = YEAR_SELECTED
HTML(DISCHARGE_MONTHLY_PIVOT.to_html())

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
1,1.198613,9.649097,2.314161,5.504935,31.699452,3.292613,6.365613,,35.197677,3.159548,3.394032,9.386581,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,28.386414,36.472929,25.551107,21.500929,442.865069,3.742321,11.727821,,24.039759,3.251071,46.2105,6.463036,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,86.782323,14.206161,13.985161,6.928129,94.854258,54.8312,7.393065,,185.242548,6.573774,34.673032,10.401032,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,172.187367,23.795433,18.4861,17.752067,23.866967,70.3093,13.1952,,82.7275,7.237767,376.740567,9.159867,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,275.560258,483.646645,154.61929,17.618194,117.863032,127.480194,69.309516,,16.410258,4.128935,49.336677,44.369258,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,228.526533,68.479367,368.0222,120.220567,360.682667,199.745367,159.2076,,17.794967,6.1375,105.698333,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,203.495161,144.611613,190.82629,74.77071,503.148516,193.472935,114.704484,,53.492613,20.860677,20.685516,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,167.313355,236.170935,192.769581,346.293065,,189.799194,711.151839,,85.292806,89.494355,12.375839,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,61.411033,162.816267,142.179767,406.8493,86.823867,141.636433,240.833867,,31.039967,22.522833,22.500033,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10,138.581516,23.780032,,452.189484,88.916903,168.443677,195.788032,,12.111871,5.104742,62.845581,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [71]:
LOG_DISCHARGE_MONTHLY_PIVOT = pd.pivot_table(DISCHARGE_MONTHLY_SELECTED, index=['Month'],columns=['Year'], values=['Q_to_log'],dropna=False)

Rename the columns based on the reference years

In [72]:
LOG_DISCHARGE_MONTHLY_PIVOT.columns = YEAR_SELECTED
HTML(LOG_DISCHARGE_MONTHLY_PIVOT.to_html())

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
1,0.181165,2.266864,0.839047,1.705645,3.456299,1.191681,1.850911,,3.56098,1.150429,1.222019,2.239281,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,3.345911,3.59657,3.240681,3.068096,6.093265,1.319706,2.461964,,3.179709,1.178985,3.833207,1.866099,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,4.463403,2.653676,2.637997,1.93559,4.552342,4.004259,2.000542,,5.221666,1.883088,3.545962,2.341905,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,5.148583,3.169494,2.917019,2.876502,3.172495,4.252904,2.579853,,4.415552,1.979313,5.931557,2.214832,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,5.618806,6.181355,5.040966,2.868932,4.769523,4.847961,4.238582,,2.797907,1.41802,3.898668,3.792547,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,5.431652,4.226532,5.908143,4.789328,5.887999,5.297043,5.070209,,2.878916,1.814417,4.660589,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,5.315642,4.974052,5.251364,4.314426,6.220885,5.265138,4.742359,,3.979544,3.037866,3.029434,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,5.119868,5.464556,5.261496,5.847285,,5.245967,6.566886,,4.44609,4.494176,2.515746,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,4.11759,5.092622,4.957092,6.008443,4.463882,4.953263,5.484107,,3.435276,3.11453,3.113517,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10,4.931459,3.168846,,6.114101,4.487702,5.126601,5.277033,,2.494186,1.63017,4.140681,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Calculate the standarised monthly anomalies

After creating the dataframe that includes a multi-year monthly log discharge, we calculate the monthly anomalies using the mean and standard deviation

In [73]:
MONTHLY_ANOMALY = pd.DataFrame()
for n in YEAR_SELECTED:
    data = (LOG_DISCHARGE_MONTHLY_PIVOT[n] - DISCHARGE_MONTHLY_STATS['mean'])/DISCHARGE_MONTHLY_STATS['std']
    MONTHLY_ANOMALY[n] = data

In [74]:
MONTHLY_ANOMALY

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-1.536267,0.458249,-0.907146,-0.078435,1.595684,-0.569929,0.06048,,1.695788,-0.609378,...,,,,,,,,,,
2,0.241717,0.425784,0.164443,0.037709,2.259184,-1.246188,-0.407393,,0.11967,-1.349524,...,,,,,,,,,,
3,1.049742,-0.458319,-0.471385,-1.056707,1.123855,0.667133,-1.002581,,1.681609,-1.100457,...,,,,,,,,,,
4,1.300559,-0.274466,-0.475394,-0.507639,-0.272078,0.587747,-0.743722,,0.717188,-1.221652,...,,,,,,,,,,
5,1.075049,1.482334,0.656692,-0.915862,0.460168,0.516957,0.075766,,-0.967284,-1.966323,...,,,,,,,,,,
6,0.632953,-0.280376,0.994073,0.146152,0.978806,0.530937,0.359025,,-1.3017,-2.108455,...,,,,,,,,,,
7,0.682468,0.350652,0.620029,-0.290099,1.561809,0.633409,0.125589,,-0.6154,-1.530132,...,,,,,,,,,,
8,0.109535,0.413811,0.234558,0.751669,,0.22085,1.386902,,-0.485248,-0.4428,...,,,,,,,,,,
9,-0.353978,0.614313,0.47972,1.523802,-0.01008,0.475918,1.003092,,-1.031575,-1.350103,...,,,,,,,,,,
10,0.536702,-0.677438,,1.351341,0.231029,0.671122,0.774743,,-1.142164,-1.737324,...,,,,,,,,,,


re-write the index from 1 to 12 (JAN to DEC)

In [75]:
MONTHLY_ANOMALY.index = np.arange(1, len(MONTHLY_ANOMALY) + 1)
HTML(MONTHLY_ANOMALY.to_html())

Unnamed: 0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
1,-1.536267,0.458249,-0.907146,-0.078435,1.595684,-0.569929,0.06048,,1.695788,-0.609378,-0.540918,0.431872,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0.241717,0.425784,0.164443,0.037709,2.259184,-1.246188,-0.407393,,0.11967,-1.349524,0.599554,-0.844955,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1.049742,-0.458319,-0.471385,-1.056707,1.123855,0.667133,-1.002581,,1.681609,-1.100457,0.28523,-0.718121,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1.300559,-0.274466,-0.475394,-0.507639,-0.272078,0.587747,-0.743722,,0.717188,-1.221652,1.923675,-1.034218,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,1.075049,1.482334,0.656692,-0.915862,0.460168,0.516957,0.075766,,-0.967284,-1.966323,-0.170332,-0.247164,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,0.632953,-0.280376,0.994073,0.146152,0.978806,0.530937,0.359025,,-1.3017,-2.108455,0.048584,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,0.682468,0.350652,0.620029,-0.290099,1.561809,0.633409,0.125589,,-0.6154,-1.530132,-1.538323,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,0.109535,0.413811,0.234558,0.751669,,0.22085,1.386902,,-0.485248,-0.4428,-2.189276,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,-0.353978,0.614313,0.47972,1.523802,-0.01008,0.475918,1.003092,,-1.031575,-1.350103,-1.351109,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10,0.536702,-0.677438,,1.351341,0.231029,0.671122,0.774743,,-1.142164,-1.737324,-0.00801,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [76]:
MONTHLY_ANOMALY.index.name = 'Month'

In [77]:
MONTHLY_ANOMALY

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-1.536267,0.458249,-0.907146,-0.078435,1.595684,-0.569929,0.06048,,1.695788,-0.609378,...,,,,,,,,,,
2,0.241717,0.425784,0.164443,0.037709,2.259184,-1.246188,-0.407393,,0.11967,-1.349524,...,,,,,,,,,,
3,1.049742,-0.458319,-0.471385,-1.056707,1.123855,0.667133,-1.002581,,1.681609,-1.100457,...,,,,,,,,,,
4,1.300559,-0.274466,-0.475394,-0.507639,-0.272078,0.587747,-0.743722,,0.717188,-1.221652,...,,,,,,,,,,
5,1.075049,1.482334,0.656692,-0.915862,0.460168,0.516957,0.075766,,-0.967284,-1.966323,...,,,,,,,,,,
6,0.632953,-0.280376,0.994073,0.146152,0.978806,0.530937,0.359025,,-1.3017,-2.108455,...,,,,,,,,,,
7,0.682468,0.350652,0.620029,-0.290099,1.561809,0.633409,0.125589,,-0.6154,-1.530132,...,,,,,,,,,,
8,0.109535,0.413811,0.234558,0.751669,,0.22085,1.386902,,-0.485248,-0.4428,...,,,,,,,,,,
9,-0.353978,0.614313,0.47972,1.523802,-0.01008,0.475918,1.003092,,-1.031575,-1.350103,...,,,,,,,,,,
10,0.536702,-0.677438,,1.351341,0.231029,0.671122,0.774743,,-1.142164,-1.737324,...,,,,,,,,,,


## Save dataframe of monthly  ANOMALY and DISCHARGE

In [78]:
ANOMALY_TS = pd.melt(MONTHLY_ANOMALY.reset_index(), id_vars='Month',var_name=['Year'],ignore_index = True)
ANOMALY_TS.columns = ['Month','Year','Anomaly_Qlog']

In [79]:
MONTHLY_ANOMALY

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-1.536267,0.458249,-0.907146,-0.078435,1.595684,-0.569929,0.06048,,1.695788,-0.609378,...,,,,,,,,,,
2,0.241717,0.425784,0.164443,0.037709,2.259184,-1.246188,-0.407393,,0.11967,-1.349524,...,,,,,,,,,,
3,1.049742,-0.458319,-0.471385,-1.056707,1.123855,0.667133,-1.002581,,1.681609,-1.100457,...,,,,,,,,,,
4,1.300559,-0.274466,-0.475394,-0.507639,-0.272078,0.587747,-0.743722,,0.717188,-1.221652,...,,,,,,,,,,
5,1.075049,1.482334,0.656692,-0.915862,0.460168,0.516957,0.075766,,-0.967284,-1.966323,...,,,,,,,,,,
6,0.632953,-0.280376,0.994073,0.146152,0.978806,0.530937,0.359025,,-1.3017,-2.108455,...,,,,,,,,,,
7,0.682468,0.350652,0.620029,-0.290099,1.561809,0.633409,0.125589,,-0.6154,-1.530132,...,,,,,,,,,,
8,0.109535,0.413811,0.234558,0.751669,,0.22085,1.386902,,-0.485248,-0.4428,...,,,,,,,,,,
9,-0.353978,0.614313,0.47972,1.523802,-0.01008,0.475918,1.003092,,-1.031575,-1.350103,...,,,,,,,,,,
10,0.536702,-0.677438,,1.351341,0.231029,0.671122,0.774743,,-1.142164,-1.737324,...,,,,,,,,,,


In [80]:
DISCHARGE_MONTHLY['Anomaly_Qlog'] = ANOMALY_TS['Anomaly_Qlog'][0:len(DISCHARGE_MONTHLY['Discharge'])].values

In [81]:
DISCHARGE_MONTHLY

Unnamed: 0_level_0,Discharge,Year,Month,water_year,Q_to_log,Anomaly_Qlog
Fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-01-31,1.198613,1980,1,1980,0.181165,-1.536267
1980-02-29,28.386414,1980,2,1980,3.345911,0.241717
1980-03-31,86.782323,1980,3,1980,4.463403,1.049742
1980-04-30,172.187367,1980,4,1981,5.148583,1.300559
1980-05-31,275.560258,1980,5,1981,5.618806,1.075049
...,...,...,...,...,...,...
2022-12-31,,2022,12,2023,,
2023-01-31,,2023,1,2023,,
2023-02-28,,2023,2,2023,,
2023-03-31,,2023,3,2023,,


### Save  MONTHLY_ANOMALY

In [82]:
MONTHLY_ANOMALY.to_csv('../notebook_verificacion/santalucia_caudales_prueba_MA.csv')

### Save  DISCHARGE_MONTHLY

In [83]:
DISCHARGE_MONTHLY.to_csv('../notebook_verificacion/santalucia_caudales_prueba_DA.csv')