# HydroSOS - Persistence and Hydrological Analogues method

In this notebook, we explore the Persistance and Hydrological Analogue method

## Import the python libraries

In [1]:
%reset -f

In [2]:
# Importing the libraries
import pandas as pd
import seaborn as sns
import numpy as np
import calendar
import datetime
import matplotlib.pyplot as plt
import math
from scipy import stats
plt.style.use('classic')
%matplotlib inline

from IPython.display import HTML

sns.set()

In [3]:
# install a conda package in the current Jupyter kernel
#import sys
#!conda install --yes --prefix {sys.prefix} missingno

In [4]:
# import missingno

### We create function for percentile definition

In [5]:
# Quantile Function Definitions
def q1(x):
    return x.quantile(0.28)

def q2(x):
    return x.median()

def q3(x):
    return x.quantile(0.72)

def q5(x):
    return x.quantile(0.05)

def q95(x):
    return x.quantile(0.95)

def q87(x):
    return x.quantile(0.87)

def q13(x):
    return x.quantile(0.13)


We create a function called add_month that add the number of months based on an input date. This will be used in the Jack-Knife Validation

In [6]:
def add_months(sourcedate, months):
    month = sourcedate.month - 1 + months
    year = sourcedate.year + month // 12
    month = month % 12 + 1
    day = min(sourcedate.day, calendar.monthrange(year,month)[1])
    return datetime.date(year, month, day)

the monthly anomalies of the most recently past months are compared with all possible historical sequences of anomalies covering the same months of the year. That is, if the recent past covers, say, the months February to July, then potential analogues are sought only in the February to July sequences of the historical record

Define a Root Mean Square Error Function. This function will be used to select the analogues years

In [7]:
def rmse(predictions,targets):
    return np.around(((predictions - targets) ** 2).mean() ** 0.5,4)

## Import Data 

In this example we import the daily discharge data from Fray Marcos station, from 01 Jan 1980 to 31 Jan 2023. We import and then visualize head and tail of dataframe, plot timeseries and also make boxplot for daily discharge grouped by month.

In [8]:
MONTHLY_ANOMALY = pd.read_csv('../notebook_verificacion/santalucia_caudales_prueba_MA.csv',parse_dates=['Month'],index_col="Month",na_values="")

In [9]:
MONTHLY_ANOMALY

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-1.536267,0.458249,-0.907146,-0.078435,1.595684,-0.569929,0.06048,,1.695788,-0.609378,...,,,,,,,,,,
2,0.241717,0.425784,0.164443,0.037709,2.259184,-1.246188,-0.407393,,0.11967,-1.349524,...,,,,,,,,,,
3,1.049742,-0.458319,-0.471385,-1.056707,1.123855,0.667133,-1.002581,,1.681609,-1.100457,...,,,,,,,,,,
4,1.300559,-0.274466,-0.475394,-0.507639,-0.272078,0.587747,-0.743722,,0.717188,-1.221652,...,,,,,,,,,,
5,1.075049,1.482334,0.656692,-0.915862,0.460168,0.516957,0.075766,,-0.967284,-1.966323,...,,,,,,,,,,
6,0.632953,-0.280376,0.994073,0.146152,0.978806,0.530937,0.359025,,-1.3017,-2.108455,...,,,,,,,,,,
7,0.682468,0.350652,0.620029,-0.290099,1.561809,0.633409,0.125589,,-0.6154,-1.530132,...,,,,,,,,,,
8,0.109535,0.413811,0.234558,0.751669,,0.22085,1.386902,,-0.485248,-0.4428,...,,,,,,,,,,
9,-0.353978,0.614313,0.47972,1.523802,-0.01008,0.475918,1.003092,,-1.031575,-1.350103,...,,,,,,,,,,
10,0.536702,-0.677438,,1.351341,0.231029,0.671122,0.774743,,-1.142164,-1.737324,...,,,,,,,,,,


In [10]:
DISCHARGE_MONTHLY = pd.read_csv('../notebook_verificacion/santalucia_caudales_prueba_DA.csv',parse_dates=['Fecha'],index_col="Fecha",na_values="")

In [11]:
DISCHARGE_MONTHLY

Unnamed: 0_level_0,Discharge,Year,Month,water_year,Q_to_log,Anomaly_Qlog
Fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-01-31,1.198613,1980,1,1980,0.181165,-1.536267
1980-02-29,28.386414,1980,2,1980,3.345911,0.241717
1980-03-31,86.782323,1980,3,1980,4.463403,1.049742
1980-04-30,172.187367,1980,4,1981,5.148583,1.300559
1980-05-31,275.560258,1980,5,1981,5.618806,1.075049
...,...,...,...,...,...,...
2022-12-31,,2022,12,2023,,
2023-01-31,,2023,1,2023,,
2023-02-28,,2023,2,2023,,
2023-03-31,,2023,3,2023,,


In [12]:
YEAR_HINDCAST_SELECTED = np.unique(DISCHARGE_MONTHLY['Year'])
# rename columns change str for int
MONTHLY_ANOMALY.columns = YEAR_HINDCAST_SELECTED

In [13]:
YEAR_HINDCAST_SELECTED

array([1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990,
       1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
       2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023],
      dtype=int64)

In [14]:
range_analysis = range(YEAR_HINDCAST_SELECTED[0],YEAR_HINDCAST_SELECTED[11])

In [15]:
LOG_DISCHARGE_MONTHLY_PIVOT = pd.pivot_table(DISCHARGE_MONTHLY, index=['Month'],columns=['Year'], values=['Q_to_log'],dropna=False)
LOG_DISCHARGE_MONTHLY_PIVOT.columns = YEAR_HINDCAST_SELECTED

In [16]:
LOG_DISCHARGE_MONTHLY_PIVOT

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.181165,2.266864,0.839047,1.705645,3.456299,1.191681,1.850911,,3.56098,1.150429,...,,,,,,,,,,
2,3.345911,3.59657,3.240681,3.068096,6.093265,1.319706,2.461964,,3.179709,1.178985,...,,,,,,,,,,
3,4.463403,2.653676,2.637997,1.93559,4.552342,4.004259,2.000542,,5.221666,1.883088,...,,,,,,,,,,
4,5.148583,3.169494,2.917019,2.876502,3.172495,4.252904,2.579853,,4.415552,1.979313,...,,,,,,,,,,
5,5.618806,6.181355,5.040966,2.868932,4.769523,4.847961,4.238582,,2.797907,1.41802,...,,,,,,,,,,
6,5.431652,4.226532,5.908143,4.789328,5.887999,5.297043,5.070209,,2.878916,1.814417,...,,,,,,,,,,
7,5.315642,4.974052,5.251364,4.314426,6.220885,5.265138,4.742359,,3.979544,3.037866,...,,,,,,,,,,
8,5.119868,5.464556,5.261496,5.847285,,5.245967,6.566886,,4.44609,4.494176,...,,,,,,,,,,
9,4.11759,5.092622,4.957092,6.008443,4.463882,4.953263,5.484107,,3.435276,3.11453,...,,,,,,,,,,
10,4.931459,3.168846,,6.114101,4.487702,5.126601,5.277033,,2.494186,1.63017,...,,,,,,,,,,


In [17]:
ANOMALY_ANALOGUES = MONTHLY_ANOMALY[range_analysis]
QLOG_ANALOGUES = LOG_DISCHARGE_MONTHLY_PIVOT[range_analysis]
HTML(ANOMALY_ANALOGUES.to_html())

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,-1.536267,0.458249,-0.907146,-0.078435,1.595684,-0.569929,0.06048,,1.695788,-0.609378,-0.540918
2,0.241717,0.425784,0.164443,0.037709,2.259184,-1.246188,-0.407393,,0.11967,-1.349524,0.599554
3,1.049742,-0.458319,-0.471385,-1.056707,1.123855,0.667133,-1.002581,,1.681609,-1.100457,0.28523
4,1.300559,-0.274466,-0.475394,-0.507639,-0.272078,0.587747,-0.743722,,0.717188,-1.221652,1.923675
5,1.075049,1.482334,0.656692,-0.915862,0.460168,0.516957,0.075766,,-0.967284,-1.966323,-0.170332
6,0.632953,-0.280376,0.994073,0.146152,0.978806,0.530937,0.359025,,-1.3017,-2.108455,0.048584
7,0.682468,0.350652,0.620029,-0.290099,1.561809,0.633409,0.125589,,-0.6154,-1.530132,-1.538323
8,0.109535,0.413811,0.234558,0.751669,,0.22085,1.386902,,-0.485248,-0.4428,-2.189276
9,-0.353978,0.614313,0.47972,1.523802,-0.01008,0.475918,1.003092,,-1.031575,-1.350103,-1.351109
10,0.536702,-0.677438,,1.351341,0.231029,0.671122,0.774743,,-1.142164,-1.737324,-0.00801


In [18]:
ANOMALY_ANALOGUE_TS = pd.melt(ANOMALY_ANALOGUES.reset_index(), id_vars='Month',var_name=['Year'],ignore_index = True)
ANOMALY_ANALOGUE_TS.columns = ['Month','Year','Anomaly_Qlog']
HTML(ANOMALY_ANALOGUE_TS.head(6).to_html())

Unnamed: 0,Month,Year,Anomaly_Qlog
0,1,1980,-1.536267
1,2,1980,0.241717
2,3,1980,1.049742
3,4,1980,1.300559
4,5,1980,1.075049
5,6,1980,0.632953


In [19]:
QLOG_ANALOGUE_TS = pd.melt(QLOG_ANALOGUES.reset_index(), id_vars='Month',var_name=['Year'],ignore_index = True)
QLOG_ANALOGUE_TS.columns = ['Month','Year','Qlog']
HTML(QLOG_ANALOGUE_TS.head(6).to_html())

Unnamed: 0,Month,Year,Qlog
0,1,1980,0.181165
1,2,1980,3.345911
2,3,1980,4.463403
3,4,1980,5.148583
4,5,1980,5.618806
5,6,1980,5.431652


In [20]:
ANOMALY_ANALOGUE_TS['DATE'] = pd.to_datetime(ANOMALY_ANALOGUE_TS[['Year', 'Month']].assign(DAY=1))
HTML(ANOMALY_ANALOGUE_TS.head(6).to_html(index=False))

Month,Year,Anomaly_Qlog,DATE
1,1980,-1.536267,1980-01-01
2,1980,0.241717,1980-02-01
3,1980,1.049742,1980-03-01
4,1980,1.300559,1980-04-01
5,1980,1.075049,1980-05-01
6,1980,0.632953,1980-06-01


In [21]:
######## PARTE CENTRAL

## HINDCAST PARA FORECAST_LENGHT EN MESES

In [22]:
## define Dana last months of observation
DANA = 6
## define forecast lenght in months
FORECAST_LENGTH = 1
##

In [23]:
##
# Toma como argumentos
# range_analysis - > YEAR_ANALYSIS
# QLOG_ANALOGUES
# QLOG_ANALOGUE_TS
# ANOMALY_ANALOGUES
# ANOMALY_ANALOGUE_TS
# MONTH_ANALYSIS
# DANA
# FORECAST_LENGTH
##


YEAR_ANALYSIS = range_analysis # definir al menos para tener DANA meses para atras con datos..

MONTH_ANALYSIS = range(1,13,1)
df = pd.DataFrame()
ANOMALY_HISTORICAL = pd.DataFrame()
RMSE = []
ANO_TARG = []
ANO_PRED = []
MON_TARG = []
MON_PRED = []
TARG = []
PRED = []
OBS_ANOM = []

MEAN_ANOM_HIND = []
STD_ANOM_HIND = []
Q1_ANOM_HIND = []
Q3_ANOM_HIND = []

MEAN_QLOG_HIND = []
STD_QLOG_HIND = []

##
print('Dana selected: ',DANA, 'months')
print('Forecast length selected: ', FORECAST_LENGTH , 'months')
##


for y in YEAR_ANALYSIS:    
            
    
    for m in MONTH_ANALYSIS:
        
        if m < 12:
            # From the Qlog values, drop the monthly values for the year of analysis and mantain the remaining years. 
            # This is the Qlog hindcast series
            # print('Dropping year:',y,"-","End-Month:",m)
            print('Dropping year:',y," - get series for analysis.")
            QLOG_HINDCAST_TARG_ANALOGUE = QLOG_ANALOGUES.drop([y],axis=1)
            QLOG_HINDCAST_TARG_ANALOGUE_TS = QLOG_ANALOGUE_TS[QLOG_ANALOGUE_TS['Year'] != y]

            # From the monthly anomaly values, drop the monthly values for the year of analysis and mantain the remaining years. 
            # This is the anomaly hindcast series
            ANOMALY_HINDCAST_TARG_ANALOGUE = ANOMALY_ANALOGUES.drop([y],axis=1)
            ANOMALY_HINDCAST_TARG_ANALOGUE_TS = ANOMALY_ANALOGUE_TS[ANOMALY_ANALOGUE_TS['Year'] != y]
        
        elif m==12 and y<YEAR_ANALYSIS[-1]:
            # From the Qlog values, drop the monthly values for the year of analysis and mantain the remaining years. 
            # This is the Qlog hindcast series
            # print('Dropping year:',y,"-","End-Month:",m)
            print('Dropping year:',y," - get series for analysis.")
            QLOG_HINDCAST_TARG_ANALOGUE = QLOG_ANALOGUES.drop([y+1],axis=1)
            QLOG_HINDCAST_TARG_ANALOGUE_TS = QLOG_ANALOGUE_TS[QLOG_ANALOGUE_TS['Year'] != y+1]

            # From the monthly anomaly values, drop the monthly values for the year of analysis and mantain the remaining years. 
            # This is the anomaly hindcast series
            ANOMALY_HINDCAST_TARG_ANALOGUE = ANOMALY_ANALOGUES.drop([y+1],axis=1)
            ANOMALY_HINDCAST_TARG_ANALOGUE_TS = ANOMALY_ANALOGUE_TS[ANOMALY_ANALOGUE_TS['Year'] != y+1]

            
        print('Year:',y,"-","End-Month of observation:",m)
        # define end month and year based on the for-loop
        END_DATE_HIST = datetime.date.today().replace(year=y, month=m, day=1)

        # define initial date according to Dana past months of observation
        FIRST_DATE_HIST = add_months(END_DATE_HIST,-DANA)
        
        # query in the original anomaly time serie the first date and end_date
        ANOMALY_TARGET = ANOMALY_ANALOGUE_TS.query('DATE > @FIRST_DATE_HIST & DATE <= @END_DATE_HIST')
        print(ANOMALY_TARGET)
        
        # Check if there are enough months in the past
        if ANOMALY_TARGET.index.size < DANA:
            print('There are not enough months in the past')
            print('')
        
        else:
            print('There are enough months in the past')
            print(f"Target period: {add_months(FIRST_DATE_HIST,1).strftime('%m-%Y')} to {(END_DATE_HIST.strftime('%m-%Y'))}")
            
            # Define a list with the targets months
            MONTHS_TARG = pd.date_range(add_months(FIRST_DATE_HIST,1),add_months(END_DATE_HIST,1),freq='M').month.to_list()
            
            # Get the anomaly hindcast target series
            ANOMALY_HINDCAST_TARG = ANOMALY_ANALOGUE_TS.drop(ANOMALY_TARGET.index)
            ANOMALY_HINDCAST_TARG = ANOMALY_HINDCAST_TARG.query('Month == @MONTHS_TARG').reset_index()
            
            # Get the anomaly target series
            df['target'] = ANOMALY_TARGET["Anomaly_Qlog"].values.round(5)

            # Adjust anomaly hindcast target series index
            if MONTHS_TARG[0]>=MONTHS_TARG[-1]:
                print('Target period includes two years')
                print('')
                if ANOMALY_HINDCAST_TARG['Month'][0] != MONTHS_TARG[0]:
                    ANOMALY_HINDCAST_TARG.drop([*range(0,ANOMALY_HINDCAST_TARG[ANOMALY_HINDCAST_TARG['Month']==MONTHS_TARG[0]].index.min())],inplace=True)
                    ANOMALY_HINDCAST_TARG.reset_index(drop=True)
                    # print(len(ANOMALY_HINDCAST_TARG.index)/DANA)

            else:
                print('Target period includes only one year')
                print('')
                pass

            # Iterate over de anomaly hindcast target where are the potential analogues
            for i in range(0, int(len(ANOMALY_HINDCAST_TARG.index)/DANA)):

                
                # create a dataframe (df) for the "preditec" (analogue) and "target" (current year)
                df['predicted'] = ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA,1:-1]["Anomaly_Qlog"].values.round(5)
                
                # save month and year value of target series
                ANO_TARG.append(f"{str(ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA,1:-1]['Year'].tolist()[0])}-{str(ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA,1:-1]['Year'].tolist()[-1])}")
                MON_TARG.append(MONTHS_TARG)
                
                # save month and year value of predicted series
                ANO_PRED.append(add_months(END_DATE_HIST, FORECAST_LENGTH).year)
                MON_PRED.append(add_months(END_DATE_HIST, FORECAST_LENGTH).month)            

                # define the date to get the anomaly value of the actual month of the potential analogue
                DATE_DANA=datetime.date.today().replace(year=ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA]['Year'].values[-1], month=ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA]['Month'].values[-1], day=1)
                
                # get the anomaly value of the following month of the potential analogue
                ANOMALY_OBS = ANOMALY_ANALOGUE_TS.query('DATE == @add_months(@DATE_DANA,@FORECAST_LENGTH)')['Anomaly_Qlog'].values
                # print(ANOMALY_OBS)
                
                # get the mean and standard deviation of monthly anomaly series use for hindcast necessary for rescale de forecast
                MEAN_ANOM_HIND.append(ANOMALY_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month].mean())
                STD_ANOM_HIND.append(ANOMALY_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month].std())           
                
                # get statistic of monthly anomaly serie use for hindcast necesary contingency tables
                Q1_ANOM_HIND.append(q1(ANOMALY_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month]))
                Q3_ANOM_HIND.append(q3(ANOMALY_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month]))  
                
                # get the mean and standard deviation of monthly qlog series use for hindcast 
                MEAN_QLOG_HIND.append(QLOG_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month].mean())
                STD_QLOG_HIND.append(QLOG_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month].std())

                
                try:
                    if math.isnan(ANOMALY_OBS):
                        #print('Next month to end-month is nan')
                        RMSE_val = np.nan
                        RMSE.append(RMSE_val)
                        TARG.append(df['target'].values.tolist())
                        PRED.append(df['predicted'].values.tolist())
                        OBS_ANOM.append(np.nan)
                    else:
                        #print('Next month to end-month is not empty')
                        if df['predicted'].isnull().values.any() == False:
                            # calculate the RMSE
                            RMSE_val = rmse(df['predicted'],df['target'])
                            # append in the empty lists
                            RMSE.append(RMSE_val)
                            TARG.append(df['target'].values.tolist())
                            PRED.append(df['predicted'].values.tolist())
                            OBS_ANOM.append(ANOMALY_OBS.round(5)[0])
                        else:
                            RMSE_val = np.nan
                            RMSE.append(RMSE_val)
                            TARG.append(df['target'].values.tolist())
                            PRED.append(df['predicted'].values.tolist())
                            OBS_ANOM.append(np.nan)
                except:
                    if ANOMALY_OBS.size == 0:
                        #print('Next month to end-month is empty')
                        RMSE_val = np.nan
                        RMSE.append(RMSE_val)
                        TARG.append(df['target'].values.tolist())
                        PRED.append(df['predicted'].values.tolist())
                        OBS_ANOM.append(np.nan)      


Dana selected:  6 months
Forecast length selected:  1 months
Dropping year: 1980  - get series for analysis.
Year: 1980 - End-Month of observation: 1
   Month  Year  Anomaly_Qlog       DATE
0      1  1980     -1.536267 1980-01-01
There are not enough months in the past

Dropping year: 1980  - get series for analysis.
Year: 1980 - End-Month of observation: 2
   Month  Year  Anomaly_Qlog       DATE
0      1  1980     -1.536267 1980-01-01
1      2  1980      0.241717 1980-02-01
There are not enough months in the past

Dropping year: 1980  - get series for analysis.
Year: 1980 - End-Month of observation: 3
   Month  Year  Anomaly_Qlog       DATE
0      1  1980     -1.536267 1980-01-01
1      2  1980      0.241717 1980-02-01
2      3  1980      1.049742 1980-03-01
There are not enough months in the past

Dropping year: 1980  - get series for analysis.
Year: 1980 - End-Month of observation: 4
   Month  Year  Anomaly_Qlog       DATE
0      1  1980     -1.536267 1980-01-01
1      2  1980      

Dropping year: 1981  - get series for analysis.
Year: 1981 - End-Month of observation: 8
    Month  Year  Anomaly_Qlog       DATE
14      3  1981     -0.458319 1981-03-01
15      4  1981     -0.274466 1981-04-01
16      5  1981      1.482334 1981-05-01
17      6  1981     -0.280376 1981-06-01
18      7  1981      0.350652 1981-07-01
19      8  1981      0.413811 1981-08-01
There are enough months in the past
Target period: 03-1981 to 08-1981
Target period includes only one year

Dropping year: 1981  - get series for analysis.
Year: 1981 - End-Month of observation: 9
    Month  Year  Anomaly_Qlog       DATE
15      4  1981     -0.274466 1981-04-01
16      5  1981      1.482334 1981-05-01
17      6  1981     -0.280376 1981-06-01
18      7  1981      0.350652 1981-07-01
19      8  1981      0.413811 1981-08-01
20      9  1981      0.614313 1981-09-01
There are enough months in the past
Target period: 04-1981 to 09-1981
Target period includes only one year

Dropping year: 1981  - get serie

Dropping year: 1983  - get series for analysis.
Year: 1983 - End-Month of observation: 3
    Month  Year  Anomaly_Qlog       DATE
33     10  1982           NaN 1982-10-01
34     11  1982           NaN 1982-11-01
35     12  1982           NaN 1982-12-01
36      1  1983     -0.078435 1983-01-01
37      2  1983      0.037709 1983-02-01
38      3  1983     -1.056707 1983-03-01
There are enough months in the past
Target period: 10-1982 to 03-1983
Target period includes two years

Dropping year: 1983  - get series for analysis.
Year: 1983 - End-Month of observation: 4
    Month  Year  Anomaly_Qlog       DATE
34     11  1982           NaN 1982-11-01
35     12  1982           NaN 1982-12-01
36      1  1983     -0.078435 1983-01-01
37      2  1983      0.037709 1983-02-01
38      3  1983     -1.056707 1983-03-01
39      4  1983     -0.507639 1983-04-01
There are enough months in the past
Target period: 11-1982 to 04-1983
Target period includes two years

Dropping year: 1983  - get series for an

Dropping year: 1984  - get series for analysis.
Year: 1984 - End-Month of observation: 9
    Month  Year  Anomaly_Qlog       DATE
51      4  1984     -0.272078 1984-04-01
52      5  1984      0.460168 1984-05-01
53      6  1984      0.978806 1984-06-01
54      7  1984      1.561809 1984-07-01
55      8  1984           NaN 1984-08-01
56      9  1984     -0.010080 1984-09-01
There are enough months in the past
Target period: 04-1984 to 09-1984
Target period includes only one year

Dropping year: 1984  - get series for analysis.
Year: 1984 - End-Month of observation: 10
    Month  Year  Anomaly_Qlog       DATE
52      5  1984      0.460168 1984-05-01
53      6  1984      0.978806 1984-06-01
54      7  1984      1.561809 1984-07-01
55      8  1984           NaN 1984-08-01
56      9  1984     -0.010080 1984-09-01
57     10  1984      0.231029 1984-10-01
There are enough months in the past
Target period: 05-1984 to 10-1984
Target period includes only one year

Dropping year: 1984  - get seri

    Month  Year  Anomaly_Qlog       DATE
71     12  1985     -0.915141 1985-12-01
72      1  1986      0.060480 1986-01-01
73      2  1986     -0.407393 1986-02-01
74      3  1986     -1.002581 1986-03-01
75      4  1986     -0.743722 1986-04-01
76      5  1986      0.075766 1986-05-01
There are enough months in the past
Target period: 12-1985 to 05-1986
Target period includes two years

Dropping year: 1986  - get series for analysis.
Year: 1986 - End-Month of observation: 6
    Month  Year  Anomaly_Qlog       DATE
72      1  1986      0.060480 1986-01-01
73      2  1986     -0.407393 1986-02-01
74      3  1986     -1.002581 1986-03-01
75      4  1986     -0.743722 1986-04-01
76      5  1986      0.075766 1986-05-01
77      6  1986      0.359025 1986-06-01
There are enough months in the past
Target period: 01-1986 to 06-1986
Target period includes only one year

Dropping year: 1986  - get series for analysis.
Year: 1986 - End-Month of observation: 7
    Month  Year  Anomaly_Qlog       

Dropping year: 1988  - get series for analysis.
Year: 1988 - End-Month of observation: 1
    Month  Year  Anomaly_Qlog       DATE
91      8  1987           NaN 1987-08-01
92      9  1987           NaN 1987-09-01
93     10  1987           NaN 1987-10-01
94     11  1987           NaN 1987-11-01
95     12  1987           NaN 1987-12-01
96      1  1988      1.695788 1988-01-01
There are enough months in the past
Target period: 08-1987 to 01-1988
Target period includes two years

Dropping year: 1988  - get series for analysis.
Year: 1988 - End-Month of observation: 2
    Month  Year  Anomaly_Qlog       DATE
92      9  1987           NaN 1987-09-01
93     10  1987           NaN 1987-10-01
94     11  1987           NaN 1987-11-01
95     12  1987           NaN 1987-12-01
96      1  1988      1.695788 1988-01-01
97      2  1988      0.119670 1988-02-01
There are enough months in the past
Target period: 09-1987 to 02-1988
Target period includes two years

Dropping year: 1988  - get series for an

Dropping year: 1989  - get series for analysis.
Year: 1989 - End-Month of observation: 8
     Month  Year  Anomaly_Qlog       DATE
110      3  1989     -1.100457 1989-03-01
111      4  1989     -1.221652 1989-04-01
112      5  1989     -1.966323 1989-05-01
113      6  1989     -2.108455 1989-06-01
114      7  1989     -1.530132 1989-07-01
115      8  1989     -0.442800 1989-08-01
There are enough months in the past
Target period: 03-1989 to 08-1989
Target period includes only one year

Dropping year: 1989  - get series for analysis.
Year: 1989 - End-Month of observation: 9
     Month  Year  Anomaly_Qlog       DATE
111      4  1989     -1.221652 1989-04-01
112      5  1989     -1.966323 1989-05-01
113      6  1989     -2.108455 1989-06-01
114      7  1989     -1.530132 1989-07-01
115      8  1989     -0.442800 1989-08-01
116      9  1989     -1.350103 1989-09-01
There are enough months in the past
Target period: 04-1989 to 09-1989
Target period includes only one year

Dropping year: 198

In [24]:
RESULTS = pd.DataFrame()
RESULTS['MONTH_PRED'] = MON_PRED
RESULTS['YEAR_PRED'] = ANO_PRED
RESULTS['MONTH_TARG'] = MON_TARG
RESULTS['YEAR_TARG'] = ANO_TARG
RESULTS['RMSE'] = RMSE
RESULTS['TARG'] = TARG
RESULTS['PRED'] = PRED
RESULTS['OBS_ANOM'] = OBS_ANOM
RESULTS['MEAN_ANOM_HIND'] = MEAN_ANOM_HIND
RESULTS['STD_ANOM_HIND'] = STD_ANOM_HIND
RESULTS['Q1_ANOM_HIND'] = Q1_ANOM_HIND
RESULTS['Q3_ANOM_HIND'] = Q3_ANOM_HIND

In [25]:
RESULTS.iloc[66:80]

Unnamed: 0,MONTH_PRED,YEAR_PRED,MONTH_TARG,YEAR_TARG,RMSE,TARG,PRED,OBS_ANOM,MEAN_ANOM_HIND,STD_ANOM_HIND,Q1_ANOM_HIND,Q3_ANOM_HIND
66,1,1981,"[7, 8, 9, 10, 11, 12]",1987-1987,,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[nan, nan, nan, nan, nan, nan]",,-0.098902,1.090611,-0.59991,0.02714
67,1,1981,"[7, 8, 9, 10, 11, 12]",1988-1988,1.4123,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[-0.6154, -0.48525, -1.03157, -1.14216, -0.726...",-0.60938,-0.098902,1.090611,-0.59991,0.02714
68,1,1981,"[7, 8, 9, 10, 11, 12]",1989-1989,1.5856,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[-1.53013, -0.4428, -1.3501, -1.73732, -1.5021...",-0.54092,-0.098902,1.090611,-0.59991,0.02714
69,1,1981,"[7, 8, 9, 10, 11, 12]",1990-1990,,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...",,-0.098902,1.090611,-0.59991,0.02714
70,2,1981,"[8, 9, 10, 11, 12, 1]",1981-1982,0.9869,"[0.10954, -0.35398, 0.5367, -0.33175, 1.29762,...","[0.41381, 0.61431, -0.67744, -1.30647, 0.57294...",0.16444,0.046575,1.065671,-0.300568,0.223171
71,2,1981,"[8, 9, 10, 11, 12, 1]",1982-1983,,"[0.10954, -0.35398, 0.5367, -0.33175, 1.29762,...","[0.23456, 0.47972, nan, nan, nan, -0.07843]",,0.046575,1.065671,-0.300568,0.223171
72,2,1981,"[8, 9, 10, 11, 12, 1]",1983-1984,1.2754,"[0.10954, -0.35398, 0.5367, -0.33175, 1.29762,...","[0.75167, 1.5238, 1.35134, 0.84712, -0.27539, ...",2.25918,0.046575,1.065671,-0.300568,0.223171
73,2,1981,"[8, 9, 10, 11, 12, 1]",1984-1985,,"[0.10954, -0.35398, 0.5367, -0.33175, 1.29762,...","[nan, -0.01008, 0.23103, 0.80948, -1.16299, -0...",,0.046575,1.065671,-0.300568,0.223171
74,2,1981,"[8, 9, 10, 11, 12, 1]",1985-1986,1.0182,"[0.10954, -0.35398, 0.5367, -0.33175, 1.29762,...","[0.22085, 0.47592, 0.67112, 0.3367, -0.91514, ...",-0.40739,0.046575,1.065671,-0.300568,0.223171
75,2,1981,"[8, 9, 10, 11, 12, 1]",1986-1987,,"[0.10954, -0.35398, 0.5367, -0.33175, 1.29762,...","[1.3869, 1.00309, 0.77474, 1.21781, 0.70194, nan]",,0.046575,1.065671,-0.300568,0.223171


In [26]:
RESULTS.tail(10)

Unnamed: 0,MONTH_PRED,YEAR_PRED,MONTH_TARG,YEAR_TARG,RMSE,TARG,PRED,OBS_ANOM,MEAN_ANOM_HIND,STD_ANOM_HIND,Q1_ANOM_HIND,Q3_ANOM_HIND
1210,1,1991,"[7, 8, 9, 10, 11, 12]",1980-1980,1.4424,"[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...","[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...",0.45825,0.012116,1.090847,-0.59991,0.362784
1211,1,1991,"[7, 8, 9, 10, 11, 12]",1981-1981,1.779,"[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...","[0.35065, 0.41381, 0.61431, -0.67744, -1.30647...",-0.90715,0.012116,1.090847,-0.59991,0.362784
1212,1,1991,"[7, 8, 9, 10, 11, 12]",1982-1982,,"[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...","[0.62003, 0.23456, 0.47972, nan, nan, nan]",,0.012116,1.090847,-0.59991,0.362784
1213,1,1991,"[7, 8, 9, 10, 11, 12]",1983-1983,1.9468,"[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...","[-0.2901, 0.75167, 1.5238, 1.35134, 0.84712, -...",1.59568,0.012116,1.090847,-0.59991,0.362784
1214,1,1991,"[7, 8, 9, 10, 11, 12]",1984-1984,,"[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...","[1.56181, nan, -0.01008, 0.23103, 0.80948, -1....",,0.012116,1.090847,-0.59991,0.362784
1215,1,1991,"[7, 8, 9, 10, 11, 12]",1985-1985,1.7885,"[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...","[0.63341, 0.22085, 0.47592, 0.67112, 0.3367, -...",0.06048,0.012116,1.090847,-0.59991,0.362784
1216,1,1991,"[7, 8, 9, 10, 11, 12]",1986-1986,,"[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...","[0.12559, 1.3869, 1.00309, 0.77474, 1.21781, 0...",,0.012116,1.090847,-0.59991,0.362784
1217,1,1991,"[7, 8, 9, 10, 11, 12]",1987-1987,,"[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...","[nan, nan, nan, nan, nan, nan]",,0.012116,1.090847,-0.59991,0.362784
1218,1,1991,"[7, 8, 9, 10, 11, 12]",1988-1988,1.4945,"[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...","[-0.6154, -0.48525, -1.03157, -1.14216, -0.726...",-0.60938,0.012116,1.090847,-0.59991,0.362784
1219,1,1991,"[7, 8, 9, 10, 11, 12]",1989-1989,1.4696,"[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...","[-1.53013, -0.4428, -1.3501, -1.73732, -1.5021...",-0.54092,0.012116,1.090847,-0.59991,0.362784


In [27]:
TTT = pd.DataFrame()
TTT['DATE'] = pd.date_range(datetime.date.today().replace(year=RESULTS.iloc[0]['YEAR_PRED'], month=RESULTS.iloc[0]['MONTH_PRED'], day=1),datetime.date.today().replace(year=RESULTS.iloc[-1]['YEAR_PRED'], month=RESULTS.iloc[-1]['MONTH_PRED'], day=1),freq='MS')
TTT['MONTH_PRED'] = pd.date_range(datetime.date.today().replace(year=RESULTS.iloc[0]['YEAR_PRED'], month=RESULTS['MONTH_PRED'][0], day=1),datetime.date.today().replace(year=RESULTS.iloc[-1]['YEAR_PRED'], month=RESULTS.iloc[-1]['MONTH_PRED'], day=1),freq='MS').month
TTT['YEAR_PRED'] = pd.date_range(datetime.date.today().replace(year=RESULTS.iloc[0]['YEAR_PRED'], month=RESULTS['MONTH_PRED'][0], day=1),datetime.date.today().replace(year=RESULTS.iloc[-1]['YEAR_PRED'], month=RESULTS.iloc[-1]['MONTH_PRED'], day=1),freq='MS').year
TTT['MONTH_PRED_cal'] = TTT['MONTH_PRED'].apply(lambda x: calendar.month_abbr[x])

In [28]:
TTT.tail(5)

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,MONTH_PRED_cal
122,1990-09-01,9,1990,Sep
123,1990-10-01,10,1990,Oct
124,1990-11-01,11,1990,Nov
125,1990-12-01,12,1990,Dec
126,1991-01-01,1,1991,Jan


In [29]:
best_n = 5
INDEX_best_n = []
RMSE_best_n = []
w_best_n = []
OBS_ANOM_best_n = []

MEAN_ANOM_HIND_best_n = []
STD_ANOM_HIND_best_n = []
Q1_ANOM_HIND_best_n = []
Q3_ANOM_HIND_best_n = []

for y in np.sort(TTT['YEAR_PRED'].unique()):
    #print(y)
    for m in np.sort(TTT['MONTH_PRED'].unique()):
        
        ## Get index observerved anomaly value series best n RMSE sorted
        AUX_1 = RESULTS.query('MONTH_PRED == @m & YEAR_PRED == @y').sort_values(by=['RMSE']).head(best_n).index
        AUX_2 = AUX_1.values.tolist()
        if AUX_2:
            INDEX_best_n.append(AUX_2)
        else:
            pass
        
        ## Get predicted anomaly value series best n RMSE sorted
        AUX_3 = RESULTS.query('MONTH_PRED == @m & YEAR_PRED == @y').sort_values(by=['RMSE']).head(best_n)
        AUX_4 = AUX_3['RMSE'].values.tolist()
#         AUX_5 = AUX_3['OBS_ANOM'].values.tolist()
        if AUX_4:
            RMSE_best_n.append(AUX_4)
            #Calculation of weights
            w_best_n.append(((1/np.array(AUX_4)) / (1/np.array(AUX_4)).sum()).round(6))
        else:
            pass

        ## Get observerved anomaly value series best n RMSE sorted
        AUX_5 = AUX_3['OBS_ANOM'].values.tolist()
        if AUX_5:
            OBS_ANOM_best_n.append(AUX_5)    
        else:
            pass
        
        ## Get mean anomaly of hindcast series best n RMSE sorted
        AUX_6 = AUX_3['MEAN_ANOM_HIND'].values.tolist()              
        if AUX_6:
            MEAN_ANOM_HIND_best_n.append(AUX_6[0])    
        else:
            pass
        
        ## Get standard deviation anomaly of hindcast series best n RMSE sorted
        AUX_7 = AUX_3['STD_ANOM_HIND'].values.tolist()       
        if AUX_7:
            STD_ANOM_HIND_best_n.append(AUX_7[0])    
        else:
            pass        
        
        ## Get Q1 anomaly of hindcast series best n RMSE sorted
        AUX_8 = AUX_3['Q1_ANOM_HIND'].values.tolist()       
        if AUX_8:
            Q1_ANOM_HIND_best_n.append(AUX_8[0])    
        else:
            pass        

        ## Get Q3 anomaly of hindcast series best n RMSE sorted
        AUX_9 = AUX_3['Q3_ANOM_HIND'].values.tolist()       
        if AUX_8:
            Q3_ANOM_HIND_best_n.append(AUX_9[0])    
        else:
            pass  

In [30]:
TTT['INDEX_sort_best_n'] = INDEX_best_n
TTT['RMSE_sort_best_n'] = RMSE_best_n
TTT['Weights_sort_best_n'] = w_best_n
TTT['OBS_ANOM_sort_best_n'] = OBS_ANOM_best_n
TTT['MEAN_ANOM_HIND_best_n'] = MEAN_ANOM_HIND_best_n
TTT['STD_ANOM_HIND_best_n'] = STD_ANOM_HIND_best_n
TTT['Q1_ANOM_HIND_best_n'] = Q1_ANOM_HIND_best_n
TTT['Q3_ANOM_HIND_best_n'] = Q3_ANOM_HIND_best_n

In [31]:
TTT.head(15)

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,MONTH_PRED_cal,INDEX_sort_best_n,RMSE_sort_best_n,Weights_sort_best_n,OBS_ANOM_sort_best_n,MEAN_ANOM_HIND_best_n,STD_ANOM_HIND_best_n,Q1_ANOM_HIND_best_n,Q3_ANOM_HIND_best_n
0,1980-07-01,7,1980,Jul,"[9, 4, 1, 0, 5]","[0.8149, 0.8291, 1.0145, 1.2759, 1.4397]","[0.250574, 0.246283, 0.201274, 0.160038, 0.14183]","[-1.53832, 0.63341, 0.62003, 0.35065, 0.12559]",-0.07583,1.029714,-0.537328,0.555378
1,1980-08-01,8,1980,Aug,"[14, 11, 10, 19, 15]","[0.7295, 0.9818, 0.9915, 1.1493, 1.3036]","[0.272248, 0.202287, 0.200308, 0.172805, 0.152...","[0.22085, 0.23456, 0.41381, -2.18928, 1.3869]",-0.013692,1.068143,-0.444498,0.427325
2,1980-09-01,9,1980,Sep,"[24, 21, 20, 27, 25]","[0.4064, 0.9826, 0.9965, 1.3349, 1.3788]","[0.413118, 0.170864, 0.168481, 0.125771, 0.121...","[0.47592, 0.47972, 0.61431, -1.03157, 1.00309]",0.039331,1.052424,-0.786416,0.582011
3,1980-10-01,10,1980,Oct,"[34, 30, 35, 37, 32]","[0.5056, 0.8776, 1.2272, 1.3386, 1.435]","[0.367897, 0.211951, 0.151572, 0.138958, 0.129...","[0.67112, -0.67744, 0.77474, -1.14216, 1.35134]",-0.067088,1.047169,-0.696027,0.675267
4,1980-11-01,11,1980,Nov,"[44, 40, 45, 42, 47]","[0.417, 0.7762, 0.905, 1.2747, 1.4849]","[0.383734, 0.206155, 0.176815, 0.125533, 0.107...","[0.3367, -1.30647, 1.21781, 0.84712, -0.7264]",0.041469,1.06074,-0.749602,0.810985
5,1980-12-01,12,1980,Dec,"[54, 50, 55, 52, 57]","[0.4433, 0.8562, 1.026, 1.0935, 1.2393]","[0.368607, 0.190847, 0.159263, 0.149432, 0.131...","[-0.91514, 0.57294, 0.70194, -0.27539, -1.2512]",-0.162202,0.933941,-0.925055,0.578105
6,1981-01-01,1,1981,Jan,"[60, 64, 62, 67, 68]","[0.8256, 1.0054, 1.2525, 1.4123, 1.5856]","[0.278894, 0.229018, 0.183836, 0.163035, 0.145...","[-0.90715, 0.06048, 1.59568, -0.60938, -0.54092]",-0.098902,1.090611,-0.59991,0.02714
7,1981-02-01,2,1981,Feb,"[70, 74, 72, 78, 77]","[0.9869, 1.0182, 1.2754, 1.3655, 1.3798]","[0.239175, 0.231822, 0.185073, 0.172861, 0.171...","[0.16444, -0.40739, 2.25918, 0.59955, -1.34952]",0.046575,1.065671,-0.300568,0.223171
8,1981-03-01,3,1981,Mar,"[79, 83, 87, 82, 81]","[0.9849, 1.0726, 1.3487, 1.3797, 1.4554]","[0.247582, 0.227339, 0.180799, 0.176737, 0.167...","[-0.47138, -1.00258, 0.28523, 0.66713, 1.12386]",0.130716,1.067735,-0.875094,0.957916
9,1981-04-01,4,1981,Apr,"[88, 92, 96, 90, 91]","[0.902, 1.0416, 1.3212, 1.3956, 1.4474]","[0.261904, 0.226803, 0.178805, 0.169273, 0.163...","[-0.47539, -0.74372, 1.92367, -0.27208, 0.58775]",0.145409,1.040719,-0.4999,0.686122


In [32]:
TTT.tail(15)

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,MONTH_PRED_cal,INDEX_sort_best_n,RMSE_sort_best_n,Weights_sort_best_n,OBS_ANOM_sort_best_n,MEAN_ANOM_HIND_best_n,STD_ANOM_HIND_best_n,Q1_ANOM_HIND_best_n,Q3_ANOM_HIND_best_n
112,1989-11-01,11,1989,Nov,"[1083, 1084, 1076, 1078, 1080]","[0.7004, 1.5231, 2.0202, 2.1194, 2.1366]","[0.405707, 0.186565, 0.140658, 0.134074, 0.132...","[-0.7264, 0.6556, -1.30647, 0.84712, 0.3367]",0.187763,0.88331,-0.347535,0.810985
113,1989-12-01,12,1989,Dec,"[1093, 1086, 1094, 1085, 1090]","[0.6515, 1.4511, 1.5994, 1.8371, 2.025]","[0.39484, 0.177271, 0.160834, 0.140024, 0.127031]","[-1.2512, 0.57294, 1.26814, 1.29762, -0.91514]",0.02949,1.064853,-0.925055,0.724585
114,1990-01-01,1,1990,Jan,"[1103, 1096, 1095, 1100, 1098]","[0.6984, 1.2875, 1.5856, 1.7368, 2.093]","[0.367821, 0.199523, 0.162012, 0.147908, 0.122...","[-0.60938, -0.90715, 0.45825, 0.06048, 1.59568]",0.012116,1.090847,-0.59991,0.362784
115,1990-02-01,2,1990,Feb,"[1113, 1106, 1105, 1110, 1108]","[0.5909, 1.0442, 1.3655, 1.5155, 2.2103]","[0.376525, 0.213071, 0.162936, 0.146809, 0.10066]","[-1.34952, 0.16444, 0.42578, -0.40739, 2.25918]",0.027267,1.055929,-0.300568,0.223171
116,1990-03-01,3,1990,Mar,"[1122, 1115, 1114, 1119, 1118]","[0.9909, 0.9999, 1.3487, 1.5467, 1.5959]","[0.250799, 0.248541, 0.184264, 0.160675, 0.155...","[-1.10046, -0.47138, -0.45832, -1.00258, 0.66713]",0.048099,1.082943,-0.875094,0.957916
117,1990-04-01,4,1990,Apr,"[1124, 1131, 1123, 1128, 1127]","[0.6723, 1.1336, 1.3212, 1.4536, 1.5072]","[0.332172, 0.197, 0.169028, 0.153632, 0.148168]","[-0.47539, -1.22165, -0.27447, -0.74372, 0.58775]",-0.098829,0.801714,-0.4999,0.381389
118,1990-05-01,5,1990,May,"[1133, 1132, 1136, 1137, 1140]","[1.1063, 1.2997, 1.3869, 1.5271, 1.6955]","[0.248397, 0.211435, 0.198141, 0.17995, 0.162077]","[0.65669, 1.48233, 0.51696, 0.07577, -1.96632]",0.046388,1.111898,-0.677871,0.623156
119,1990-06-01,6,1990,Jun,"[1145, 1142, 1146, 1141, 1144]","[1.0544, 1.154, 1.3336, 1.3842, 1.4857]","[0.239477, 0.218808, 0.18934, 0.182419, 0.169956]","[0.53094, 0.99407, 0.35902, -0.28038, 0.97881]",-0.005398,1.060506,-0.178009,0.608469
120,1990-07-01,7,1990,Jul,"[1150, 1155, 1152, 1153, 1151]","[0.8149, 1.0036, 1.1711, 1.2115, 1.2418]","[0.260642, 0.211635, 0.181366, 0.175317, 0.17104]","[0.68247, 0.63341, 0.62003, -0.2901, 0.35065]",0.170925,0.892372,-0.190334,0.630197
121,1990-08-01,8,1990,Aug,"[1168, 1160, 1163, 1165, 1161]","[1.0759, 1.1493, 1.3007, 1.3391, 1.4037]","[0.230775, 0.216036, 0.19089, 0.185416, 0.176883]","[-0.48525, 0.10954, 0.75167, 0.22085, 0.41381]",0.273659,0.610381,0.087442,0.427325


In [33]:
Pred_Anom = []
for i in TTT.index:
    Pred_Anom.append((TTT['Weights_sort_best_n'][i] * TTT['OBS_ANOM_sort_best_n'][i]).sum())
    
TTT['Frcst_Anom'] = Pred_Anom

In [34]:
TTT

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,MONTH_PRED_cal,INDEX_sort_best_n,RMSE_sort_best_n,Weights_sort_best_n,OBS_ANOM_sort_best_n,MEAN_ANOM_HIND_best_n,STD_ANOM_HIND_best_n,Q1_ANOM_HIND_best_n,Q3_ANOM_HIND_best_n,Frcst_Anom
0,1980-07-01,7,1980,Jul,"[9, 4, 1, 0, 5]","[0.8149, 0.8291, 1.0145, 1.2759, 1.4397]","[0.250574, 0.246283, 0.201274, 0.160038, 0.14183]","[-1.53832, 0.63341, 0.62003, 0.35065, 0.12559]",-0.075830,1.029714,-0.537328,0.555378,-0.030739
1,1980-08-01,8,1980,Aug,"[14, 11, 10, 19, 15]","[0.7295, 0.9818, 0.9915, 1.1493, 1.3036]","[0.272248, 0.202287, 0.200308, 0.172805, 0.152...","[0.22085, 0.23456, 0.41381, -2.18928, 1.3869]",-0.013692,1.068143,-0.444498,0.427325,0.023441
2,1980-09-01,9,1980,Sep,"[24, 21, 20, 27, 25]","[0.4064, 0.9826, 0.9965, 1.3349, 1.3788]","[0.413118, 0.170864, 0.168481, 0.125771, 0.121...","[0.47592, 0.47972, 0.61431, -1.03157, 1.00309]",0.039331,1.052424,-0.786416,0.582011,0.374478
3,1980-10-01,10,1980,Oct,"[34, 30, 35, 37, 32]","[0.5056, 0.8776, 1.2272, 1.3386, 1.435]","[0.367897, 0.211951, 0.151572, 0.138958, 0.129...","[0.67112, -0.67744, 0.77474, -1.14216, 1.35134]",-0.067088,1.047169,-0.696027,0.675267,0.237200
4,1980-11-01,11,1980,Nov,"[44, 40, 45, 42, 47]","[0.417, 0.7762, 0.905, 1.2747, 1.4849]","[0.383734, 0.206155, 0.176815, 0.125533, 0.107...","[0.3367, -1.30647, 1.21781, 0.84712, -0.7264]",0.041469,1.060740,-0.749602,0.810985,0.103257
...,...,...,...,...,...,...,...,...,...,...,...,...,...
122,1990-09-01,9,1990,Sep,"[1178, 1170, 1175, 1172, 1173]","[1.2662, 1.4766, 1.481, 1.7531, 1.7552]","[0.240608, 0.206324, 0.205711, 0.173782, 0.173...","[-1.03157, -0.35398, 0.47592, 0.47972, 1.5238]",0.150123,0.933520,-0.271443,0.582011,0.124524
123,1990-10-01,10,1990,Oct,"[1188, 1180, 1185, 1189, 1181]","[1.1381, 1.4995, 1.6509, 1.8629, 1.9096]","[0.273576, 0.207641, 0.188599, 0.167136, 0.163...","[-1.14216, 0.5367, 0.67112, -1.73732, -0.67744]",0.001001,1.069040,-0.696027,0.675267,-0.475278
124,1990-11-01,11,1990,Nov,"[1198, 1190, 1199, 1195, 1191]","[1.1256, 1.4944, 1.5231, 1.5827, 1.7076]","[0.258894, 0.195002, 0.191327, 0.184122, 0.170...","[-0.7264, -0.33175, -1.5021, 0.3367, -1.30647]",-0.081950,1.036233,-0.749602,0.810985,-0.701107
125,1990-12-01,12,1990,Dec,"[1208, 1200, 1205, 1209, 1201]","[1.2164, 1.462, 1.5631, 1.5994, 1.7613]","[0.246222, 0.20486, 0.19161, 0.187261, 0.170048]","[-1.2512, 1.29762, -0.91514, -0.23592, 0.57294]",-0.158518,0.940425,-0.925055,0.578105,-0.164344


In [35]:
TTT['Frcst_Anom_Rest'] = ( TTT['Frcst_Anom'] - TTT['MEAN_ANOM_HIND_best_n'] ) / TTT['STD_ANOM_HIND_best_n']

In [36]:
HIND_FORECAST_ANALOGUE = pd.DataFrame()
HIND_FORECAST_ANALOGUE['DATE'] = TTT['DATE']
HIND_FORECAST_ANALOGUE['MONTH_PRED'] = TTT['MONTH_PRED']
HIND_FORECAST_ANALOGUE['YEAR_PRED'] = TTT['YEAR_PRED']
HIND_FORECAST_ANALOGUE['FORECAST'] = TTT['Frcst_Anom_Rest']

In [37]:
ANOMALY_ANALOGUE_TS

Unnamed: 0,Month,Year,Anomaly_Qlog,DATE
0,1,1980,-1.536267,1980-01-01
1,2,1980,0.241717,1980-02-01
2,3,1980,1.049742,1980-03-01
3,4,1980,1.300559,1980-04-01
4,5,1980,1.075049,1980-05-01
...,...,...,...,...
127,8,1990,-2.189276,1990-08-01
128,9,1990,-1.351109,1990-09-01
129,10,1990,-0.008010,1990-10-01
130,11,1990,0.655603,1990-11-01


In [38]:
HIND_FORECAST_ANALOGUE

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,FORECAST
0,1980-07-01,7,1980,0.043789
1,1980-08-01,8,1980,0.034764
2,1980-09-01,9,1980,0.318453
3,1980-10-01,10,1980,0.290581
4,1980-11-01,11,1980,0.058251
...,...,...,...,...
122,1990-09-01,9,1990,-0.027423
123,1990-10-01,10,1990,-0.445520
124,1990-11-01,11,1990,-0.597506
125,1990-12-01,12,1990,-0.006195


In [39]:
OBSERVED_AND_HIND_FORECAST_ANALOGUE = pd.merge(ANOMALY_ANALOGUE_TS, HIND_FORECAST_ANALOGUE[['DATE','FORECAST']], how='left', on='DATE')

In [40]:
OBSERVED_AND_HIND_FORECAST_ANALOGUE.head(30)

Unnamed: 0,Month,Year,Anomaly_Qlog,DATE,FORECAST
0,1,1980,-1.536267,1980-01-01,
1,2,1980,0.241717,1980-02-01,
2,3,1980,1.049742,1980-03-01,
3,4,1980,1.300559,1980-04-01,
4,5,1980,1.075049,1980-05-01,
5,6,1980,0.632953,1980-06-01,
6,7,1980,0.682468,1980-07-01,0.043789
7,8,1980,0.109535,1980-08-01,0.034764
8,9,1980,-0.353978,1980-09-01,0.318453
9,10,1980,0.536702,1980-10-01,0.290581


In [41]:
OBSERVED_AND_HIND_FORECAST_ANALOGUE.tail(25)

Unnamed: 0,Month,Year,Anomaly_Qlog,DATE,FORECAST
107,12,1988,-1.251198,1988-12-01,0.194524
108,1,1989,-0.609378,1989-01-01,-0.069317
109,2,1989,-1.349524,1989-02-01,0.257669
110,3,1989,-1.100457,1989-03-01,-0.328333
111,4,1989,-1.221652,1989-04-01,-0.062878
112,5,1989,-1.966323,1989-05-01,0.260753
113,6,1989,-2.108455,1989-06-01,0.196077
114,7,1989,-1.530132,1989-07-01,-0.115736
115,8,1989,-0.4428,1989-08-01,0.037741
116,9,1989,-1.350103,1989-09-01,0.429173


### Save  OBSERVED_AND_HIND_FORECAST_ANALOGUE

In [44]:
filename = 'santalucia_caudales_mar2023' + '_FL_' + str(FORECAST_LENGTH) + '_DANA_' + str(DANA)

In [45]:
filename

'santalucia_caudales_mar2023_FL_1_DANA_6'

In [47]:
OBSERVED_AND_HIND_FORECAST_ANALOGUE.to_csv('../data/' + filename + '.csv', index = False)