# HydroSOS - Persistence and Hydrological Analogues method

In this notebook, we explore the Persistance and Hydrological Analogue method

## Import the python libraries

In [1]:
%reset -f

In [2]:
# Importing the libraries
import pandas as pd
import seaborn as sns
import numpy as np
import calendar
import datetime
import matplotlib.pyplot as plt
import math
from scipy import stats
plt.style.use('classic')
%matplotlib inline

from IPython.display import HTML

sns.set()

In [3]:
# install a conda package in the current Jupyter kernel
#import sys
#!conda install --yes --prefix {sys.prefix} missingno

In [4]:
# import missingno

### We create function for percentile definition

In [5]:
# Quantile Function Definitions
def q1(x):
    return x.quantile(0.28)

def q2(x):
    return x.median()

def q3(x):
    return x.quantile(0.72)

def q5(x):
    return x.quantile(0.05)

def q95(x):
    return x.quantile(0.95)

def q87(x):
    return x.quantile(0.87)

def q13(x):
    return x.quantile(0.13)


We create a function called add_month that add the number of months based on an input date. This will be used in the Jack-Knife Validation

In [6]:
def add_months(sourcedate, months):
    month = sourcedate.month - 1 + months
    year = sourcedate.year + month // 12
    month = month % 12 + 1
    day = min(sourcedate.day, calendar.monthrange(year,month)[1])
    return datetime.date(year, month, day)

the monthly anomalies of the most recently past months are compared with all possible historical sequences of anomalies covering the same months of the year. That is, if the recent past covers, say, the months February to July, then potential analogues are sought only in the February to July sequences of the historical record

Define a Root Mean Square Error Function. This function will be used to select the analogues years

In [7]:
def rmse(predictions,targets):
    return np.around(((predictions - targets) ** 2).mean() ** 0.5,4)

## Import Data 

In this example we import the daily discharge data from Fray Marcos station, from 01 Jan 1980 to 31 Jan 2023. We import and then visualize head and tail of dataframe, plot timeseries and also make boxplot for daily discharge grouped by month.

In [8]:
MONTHLY_ANOMALY = pd.read_csv('../notebook_verificacion/santalucia_caudales_prueba_MA.csv',parse_dates=['Month'],index_col="Month",na_values="")

In [9]:
MONTHLY_ANOMALY

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-1.536267,0.458249,-0.907146,-0.078435,1.595684,-0.569929,0.06048,,1.695788,-0.609378,...,,,,,,,,,,
2,0.241717,0.425784,0.164443,0.037709,2.259184,-1.246188,-0.407393,,0.11967,-1.349524,...,,,,,,,,,,
3,1.049742,-0.458319,-0.471385,-1.056707,1.123855,0.667133,-1.002581,,1.681609,-1.100457,...,,,,,,,,,,
4,1.300559,-0.274466,-0.475394,-0.507639,-0.272078,0.587747,-0.743722,,0.717188,-1.221652,...,,,,,,,,,,
5,1.075049,1.482334,0.656692,-0.915862,0.460168,0.516957,0.075766,,-0.967284,-1.966323,...,,,,,,,,,,
6,0.632953,-0.280376,0.994073,0.146152,0.978806,0.530937,0.359025,,-1.3017,-2.108455,...,,,,,,,,,,
7,0.682468,0.350652,0.620029,-0.290099,1.561809,0.633409,0.125589,,-0.6154,-1.530132,...,,,,,,,,,,
8,0.109535,0.413811,0.234558,0.751669,,0.22085,1.386902,,-0.485248,-0.4428,...,,,,,,,,,,
9,-0.353978,0.614313,0.47972,1.523802,-0.01008,0.475918,1.003092,,-1.031575,-1.350103,...,,,,,,,,,,
10,0.536702,-0.677438,,1.351341,0.231029,0.671122,0.774743,,-1.142164,-1.737324,...,,,,,,,,,,


In [10]:
DISCHARGE_MONTHLY = pd.read_csv('../notebook_verificacion/santalucia_caudales_prueba_DA.csv',parse_dates=['Fecha'],index_col="Fecha",na_values="")

In [11]:
DISCHARGE_MONTHLY

Unnamed: 0_level_0,Discharge,Year,Month,water_year,Q_to_log,Anomaly_Qlog
Fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-01-31,1.198613,1980,1,1980,0.181165,-1.536267
1980-02-29,28.386414,1980,2,1980,3.345911,0.241717
1980-03-31,86.782323,1980,3,1980,4.463403,1.049742
1980-04-30,172.187367,1980,4,1981,5.148583,1.300559
1980-05-31,275.560258,1980,5,1981,5.618806,1.075049
...,...,...,...,...,...,...
2022-12-31,,2022,12,2023,,
2023-01-31,,2023,1,2023,,
2023-02-28,,2023,2,2023,,
2023-03-31,,2023,3,2023,,


In [12]:
YEAR_HINDCAST_SELECTED = np.unique(DISCHARGE_MONTHLY['Year'])
# rename columns change str for int
MONTHLY_ANOMALY.columns = YEAR_HINDCAST_SELECTED

In [13]:
YEAR_HINDCAST_SELECTED

array([1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990,
       1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
       2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023],
      dtype=int64)

In [14]:
range_analysis = range(YEAR_HINDCAST_SELECTED[0],YEAR_HINDCAST_SELECTED[12])

In [15]:
LOG_DISCHARGE_MONTHLY_PIVOT = pd.pivot_table(DISCHARGE_MONTHLY, index=['Month'],columns=['Year'], values=['Q_to_log'],dropna=False)
LOG_DISCHARGE_MONTHLY_PIVOT.columns = YEAR_HINDCAST_SELECTED

In [16]:
LOG_DISCHARGE_MONTHLY_PIVOT

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.181165,2.266864,0.839047,1.705645,3.456299,1.191681,1.850911,,3.56098,1.150429,...,,,,,,,,,,
2,3.345911,3.59657,3.240681,3.068096,6.093265,1.319706,2.461964,,3.179709,1.178985,...,,,,,,,,,,
3,4.463403,2.653676,2.637997,1.93559,4.552342,4.004259,2.000542,,5.221666,1.883088,...,,,,,,,,,,
4,5.148583,3.169494,2.917019,2.876502,3.172495,4.252904,2.579853,,4.415552,1.979313,...,,,,,,,,,,
5,5.618806,6.181355,5.040966,2.868932,4.769523,4.847961,4.238582,,2.797907,1.41802,...,,,,,,,,,,
6,5.431652,4.226532,5.908143,4.789328,5.887999,5.297043,5.070209,,2.878916,1.814417,...,,,,,,,,,,
7,5.315642,4.974052,5.251364,4.314426,6.220885,5.265138,4.742359,,3.979544,3.037866,...,,,,,,,,,,
8,5.119868,5.464556,5.261496,5.847285,,5.245967,6.566886,,4.44609,4.494176,...,,,,,,,,,,
9,4.11759,5.092622,4.957092,6.008443,4.463882,4.953263,5.484107,,3.435276,3.11453,...,,,,,,,,,,
10,4.931459,3.168846,,6.114101,4.487702,5.126601,5.277033,,2.494186,1.63017,...,,,,,,,,,,


In [17]:
ANOMALY_ANALOGUES = MONTHLY_ANOMALY[range_analysis]
QLOG_ANALOGUES = LOG_DISCHARGE_MONTHLY_PIVOT[range_analysis]
HTML(ANOMALY_ANALOGUES.to_html())

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,-1.536267,0.458249,-0.907146,-0.078435,1.595684,-0.569929,0.06048,,1.695788,-0.609378,-0.540918,0.431872
2,0.241717,0.425784,0.164443,0.037709,2.259184,-1.246188,-0.407393,,0.11967,-1.349524,0.599554,-0.844955
3,1.049742,-0.458319,-0.471385,-1.056707,1.123855,0.667133,-1.002581,,1.681609,-1.100457,0.28523,-0.718121
4,1.300559,-0.274466,-0.475394,-0.507639,-0.272078,0.587747,-0.743722,,0.717188,-1.221652,1.923675,-1.034218
5,1.075049,1.482334,0.656692,-0.915862,0.460168,0.516957,0.075766,,-0.967284,-1.966323,-0.170332,-0.247164
6,0.632953,-0.280376,0.994073,0.146152,0.978806,0.530937,0.359025,,-1.3017,-2.108455,0.048584,
7,0.682468,0.350652,0.620029,-0.290099,1.561809,0.633409,0.125589,,-0.6154,-1.530132,-1.538323,
8,0.109535,0.413811,0.234558,0.751669,,0.22085,1.386902,,-0.485248,-0.4428,-2.189276,
9,-0.353978,0.614313,0.47972,1.523802,-0.01008,0.475918,1.003092,,-1.031575,-1.350103,-1.351109,
10,0.536702,-0.677438,,1.351341,0.231029,0.671122,0.774743,,-1.142164,-1.737324,-0.00801,


In [18]:
ANOMALY_ANALOGUE_TS = pd.melt(ANOMALY_ANALOGUES.reset_index(), id_vars='Month',var_name=['Year'],ignore_index = True)
ANOMALY_ANALOGUE_TS.columns = ['Month','Year','Anomaly_Qlog']
HTML(ANOMALY_ANALOGUE_TS.head(6).to_html())

Unnamed: 0,Month,Year,Anomaly_Qlog
0,1,1980,-1.536267
1,2,1980,0.241717
2,3,1980,1.049742
3,4,1980,1.300559
4,5,1980,1.075049
5,6,1980,0.632953


In [19]:
QLOG_ANALOGUE_TS = pd.melt(QLOG_ANALOGUES.reset_index(), id_vars='Month',var_name=['Year'],ignore_index = True)
QLOG_ANALOGUE_TS.columns = ['Month','Year','Qlog']
HTML(QLOG_ANALOGUE_TS.head(6).to_html())

Unnamed: 0,Month,Year,Qlog
0,1,1980,0.181165
1,2,1980,3.345911
2,3,1980,4.463403
3,4,1980,5.148583
4,5,1980,5.618806
5,6,1980,5.431652


In [20]:
ANOMALY_ANALOGUE_TS['DATE'] = pd.to_datetime(ANOMALY_ANALOGUE_TS[['Year', 'Month']].assign(DAY=1))
HTML(ANOMALY_ANALOGUE_TS.head(6).to_html(index=False))

Month,Year,Anomaly_Qlog,DATE
1,1980,-1.536267,1980-01-01
2,1980,0.241717,1980-02-01
3,1980,1.049742,1980-03-01
4,1980,1.300559,1980-04-01
5,1980,1.075049,1980-05-01
6,1980,0.632953,1980-06-01


In [21]:
######## PARTE CENTRAL

## HINDCAST PARA FORECAST_LENGHT EN MESES

In [22]:
## define Dana last months of observation
DANA = 9
## define forecast lenght in months
FORECAST_LENGTH = 3
##

In [25]:
##
# Toma como argumentos
# range_analysis - > YEAR_ANALYSIS
# QLOG_ANALOGUES
# QLOG_ANALOGUE_TS
# ANOMALY_ANALOGUES
# ANOMALY_ANALOGUE_TS
# MONTH_ANALYSIS
# DANA
# FORECAST_LENGTH
##


YEAR_ANALYSIS = range_analysis # definir al menos para tener DANA meses para atras con datos..

MONTH_ANALYSIS = range(1,13,1)
df = pd.DataFrame()
ANOMALY_HISTORICAL = pd.DataFrame()
RMSE = []
ANO_TARG = []
ANO_PRED = []
MON_TARG = []
MON_PRED = []
TARG = []
PRED = []
OBS_ANOM = []

MEAN_ANOM_HIND = []
STD_ANOM_HIND = []
Q1_ANOM_HIND = []
Q3_ANOM_HIND = []

MEAN_QLOG_HIND = []
STD_QLOG_HIND = []

##
print('Dana selected: ',DANA, 'months')
print('Forecast length selected: ', FORECAST_LENGTH , 'months')
##


for y in YEAR_ANALYSIS:    
            
    
    for m in MONTH_ANALYSIS:
        
        print('Year:',y,"-","End-Month of observation:",m)
        # define end month and year based on the for-loop
        END_DATE_HIST = datetime.date.today().replace(year=y, month=m, day=1)

        # define initial date according to Dana past months of observation
        FIRST_DATE_HIST = add_months(END_DATE_HIST,-DANA)
        
        # Define forecast date
        FORECAST_DATE = add_months(END_DATE_HIST,FORECAST_LENGTH)       
        print('Forecast Year:',FORECAST_DATE.year,"-"," Forecast Month:",FORECAST_DATE.month)

        
        if FORECAST_DATE.year <= YEAR_ANALYSIS[-1]:
            # From the Qlog values, drop the monthly values for the year of analysis and mantain the remaining years. 
            # This is the Qlog hindcast series
            # print('Dropping year:',y,"-","End-Month:",m)
            print('Dropping year:',y," - get series for analysis.")
            QLOG_HINDCAST_TARG_ANALOGUE = QLOG_ANALOGUES.drop([FORECAST_DATE.year],axis=1)
            QLOG_HINDCAST_TARG_ANALOGUE_TS = QLOG_ANALOGUE_TS[QLOG_ANALOGUE_TS['Year'] != FORECAST_DATE.year]

            # From the monthly anomaly values, drop the monthly values for the year of analysis and mantain the remaining years. 
            # This is the anomaly hindcast series
            ANOMALY_HINDCAST_TARG_ANALOGUE = ANOMALY_ANALOGUES.drop([FORECAST_DATE.year],axis=1)
            ANOMALY_HINDCAST_TARG_ANALOGUE_TS = ANOMALY_ANALOGUE_TS[ANOMALY_ANALOGUE_TS['Year'] != FORECAST_DATE.year]
        
        else:
            print('Forecast Year out of range')
            break

            
        
        # query in the original anomaly time serie the first date and end_date
        ANOMALY_TARGET = ANOMALY_ANALOGUE_TS.query('DATE > @FIRST_DATE_HIST & DATE <= @END_DATE_HIST')
        print(ANOMALY_TARGET)
        
        # Check if there are enough months in the past
        if ANOMALY_TARGET.index.size < DANA:
            print('There are not enough months in the past')
            print('')
        
        else:
            print('There are enough months in the past')
            print(f"Target period: {add_months(FIRST_DATE_HIST,1).strftime('%m-%Y')} to {(END_DATE_HIST.strftime('%m-%Y'))}")
            
            # Define a list with the targets months
            MONTHS_TARG = pd.date_range(add_months(FIRST_DATE_HIST,1),add_months(END_DATE_HIST,1),freq='M').month.to_list()
            
            # Get the anomaly hindcast target series
            ANOMALY_HINDCAST_TARG = ANOMALY_ANALOGUE_TS.drop(ANOMALY_TARGET.index)
            ANOMALY_HINDCAST_TARG = ANOMALY_HINDCAST_TARG.query('Month == @MONTHS_TARG').reset_index()
            
            # Get the anomaly target series
            df['target'] = ANOMALY_TARGET["Anomaly_Qlog"].values.round(5)

            # Adjust anomaly hindcast target series index
            if MONTHS_TARG[0]>=MONTHS_TARG[-1]:
                print('Target period includes two years')
                print('')
                if ANOMALY_HINDCAST_TARG['Month'][0] != MONTHS_TARG[0]:
                    ANOMALY_HINDCAST_TARG.drop([*range(0,ANOMALY_HINDCAST_TARG[ANOMALY_HINDCAST_TARG['Month']==MONTHS_TARG[0]].index.min())],inplace=True)
                    ANOMALY_HINDCAST_TARG.reset_index(drop=True)
                    # print(len(ANOMALY_HINDCAST_TARG.index)/DANA)

            else:
                print('Target period includes only one year')
                print('')
                pass

            # Iterate over de anomaly hindcast target where are the potential analogues
            for i in range(0, int(len(ANOMALY_HINDCAST_TARG.index)/DANA)):

                
                # create a dataframe (df) for the "preditec" (analogue) and "target" (current year)
                df['predicted'] = ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA,1:-1]["Anomaly_Qlog"].values.round(5)
                
                # save month and year value of target series
                ANO_TARG.append(f"{str(ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA,1:-1]['Year'].tolist()[0])}-{str(ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA,1:-1]['Year'].tolist()[-1])}")
                MON_TARG.append(MONTHS_TARG)
                
                # save month and year value of predicted series
                ANO_PRED.append(add_months(END_DATE_HIST, FORECAST_LENGTH).year)
                MON_PRED.append(add_months(END_DATE_HIST, FORECAST_LENGTH).month)            

                # define the date to get the anomaly value of the actual month of the potential analogue
                DATE_DANA=datetime.date.today().replace(year=ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA]['Year'].values[-1], month=ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA]['Month'].values[-1], day=1)
                
                # get the anomaly value of the following month of the potential analogue
                ANOMALY_OBS = ANOMALY_ANALOGUE_TS.query('DATE == @add_months(@DATE_DANA,@FORECAST_LENGTH)')['Anomaly_Qlog'].values
                # print(ANOMALY_OBS)
                
                # get the mean and standard deviation of monthly anomaly series use for hindcast necessary for rescale de forecast
                MEAN_ANOM_HIND.append(ANOMALY_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month].mean())
                STD_ANOM_HIND.append(ANOMALY_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month].std())           
                
                # get statistic of monthly anomaly serie use for hindcast necesary contingency tables
                Q1_ANOM_HIND.append(q1(ANOMALY_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month]))
                Q3_ANOM_HIND.append(q3(ANOMALY_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month]))  
                
                # get the mean and standard deviation of monthly qlog series use for hindcast 
                MEAN_QLOG_HIND.append(QLOG_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month].mean())
                STD_QLOG_HIND.append(QLOG_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month].std())

                
                try:
                    if math.isnan(ANOMALY_OBS):
                        #print('Next month to end-month is nan')
                        RMSE_val = np.nan
                        RMSE.append(RMSE_val)
                        TARG.append(df['target'].values.tolist())
                        PRED.append(df['predicted'].values.tolist())
                        OBS_ANOM.append(np.nan)
                    else:
                        #print('Next month to end-month is not empty')
                        if df['predicted'].isnull().values.any() == False:
                            # calculate the RMSE
                            RMSE_val = rmse(df['predicted'],df['target'])
                            # append in the empty lists
                            RMSE.append(RMSE_val)
                            TARG.append(df['target'].values.tolist())
                            PRED.append(df['predicted'].values.tolist())
                            OBS_ANOM.append(ANOMALY_OBS.round(5)[0])
                        else:
                            RMSE_val = np.nan
                            RMSE.append(RMSE_val)
                            TARG.append(df['target'].values.tolist())
                            PRED.append(df['predicted'].values.tolist())
                            OBS_ANOM.append(np.nan)
                except:
                    if ANOMALY_OBS.size == 0:
                        #print('Next month to end-month is empty')
                        RMSE_val = np.nan
                        RMSE.append(RMSE_val)
                        TARG.append(df['target'].values.tolist())
                        PRED.append(df['predicted'].values.tolist())
                        OBS_ANOM.append(np.nan)      


Dana selected:  9 months
Forecast length selected:  3 months
Year: 1980 - End-Month of observation: 1
Forecast Year: 1980 -  Forecast Month: 4
Dropping year: 1980  - get series for analysis.
   Month  Year  Anomaly_Qlog       DATE
0      1  1980     -1.536267 1980-01-01
There are not enough months in the past

Year: 1980 - End-Month of observation: 2
Forecast Year: 1980 -  Forecast Month: 5
Dropping year: 1980  - get series for analysis.
   Month  Year  Anomaly_Qlog       DATE
0      1  1980     -1.536267 1980-01-01
1      2  1980      0.241717 1980-02-01
There are not enough months in the past

Year: 1980 - End-Month of observation: 3
Forecast Year: 1980 -  Forecast Month: 6
Dropping year: 1980  - get series for analysis.
   Month  Year  Anomaly_Qlog       DATE
0      1  1980     -1.536267 1980-01-01
1      2  1980      0.241717 1980-02-01
2      3  1980      1.049742 1980-03-01
There are not enough months in the past

Year: 1980 - End-Month of observation: 4
Forecast Year: 1980 -  Fo

Year: 1981 - End-Month of observation: 5
Forecast Year: 1981 -  Forecast Month: 8
Dropping year: 1981  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
8       9  1980     -0.353978 1980-09-01
9      10  1980      0.536702 1980-10-01
10     11  1980     -0.331749 1980-11-01
11     12  1980      1.297617 1980-12-01
12      1  1981      0.458249 1981-01-01
13      2  1981      0.425784 1981-02-01
14      3  1981     -0.458319 1981-03-01
15      4  1981     -0.274466 1981-04-01
16      5  1981      1.482334 1981-05-01
There are enough months in the past
Target period: 09-1980 to 05-1981
Target period includes two years

Year: 1981 - End-Month of observation: 6
Forecast Year: 1981 -  Forecast Month: 9
Dropping year: 1981  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
9      10  1980      0.536702 1980-10-01
10     11  1980     -0.331749 1980-11-01
11     12  1980      1.297617 1980-12-01
12      1  1981      0.458249 1981-01-01
13      2  1981      

Year: 1982 - End-Month of observation: 8
Forecast Year: 1982 -  Forecast Month: 11
Dropping year: 1982  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
23     12  1981      0.572945 1981-12-01
24      1  1982     -0.907146 1982-01-01
25      2  1982      0.164443 1982-02-01
26      3  1982     -0.471385 1982-03-01
27      4  1982     -0.475394 1982-04-01
28      5  1982      0.656692 1982-05-01
29      6  1982      0.994073 1982-06-01
30      7  1982      0.620029 1982-07-01
31      8  1982      0.234558 1982-08-01
There are enough months in the past
Target period: 12-1981 to 08-1982
Target period includes two years

Year: 1982 - End-Month of observation: 9
Forecast Year: 1982 -  Forecast Month: 12
Dropping year: 1982  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
24      1  1982     -0.907146 1982-01-01
25      2  1982      0.164443 1982-02-01
26      3  1982     -0.471385 1982-03-01
27      4  1982     -0.475394 1982-04-01
28      5  1982    

Target period includes two years

Year: 1983 - End-Month of observation: 9
Forecast Year: 1983 -  Forecast Month: 12
Dropping year: 1983  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
36      1  1983     -0.078435 1983-01-01
37      2  1983      0.037709 1983-02-01
38      3  1983     -1.056707 1983-03-01
39      4  1983     -0.507639 1983-04-01
40      5  1983     -0.915862 1983-05-01
41      6  1983      0.146152 1983-06-01
42      7  1983     -0.290099 1983-07-01
43      8  1983      0.751669 1983-08-01
44      9  1983      1.523802 1983-09-01
There are enough months in the past
Target period: 01-1983 to 09-1983
Target period includes only one year

Year: 1983 - End-Month of observation: 10
Forecast Year: 1984 -  Forecast Month: 1
Dropping year: 1983  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
37      2  1983      0.037709 1983-02-01
38      3  1983     -1.056707 1983-03-01
39      4  1983     -0.507639 1983-04-01
40      5  1983     -0

Year: 1984 - End-Month of observation: 11
Forecast Year: 1985 -  Forecast Month: 2
Dropping year: 1984  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
50      3  1984      1.123855 1984-03-01
51      4  1984     -0.272078 1984-04-01
52      5  1984      0.460168 1984-05-01
53      6  1984      0.978806 1984-06-01
54      7  1984      1.561809 1984-07-01
55      8  1984           NaN 1984-08-01
56      9  1984     -0.010080 1984-09-01
57     10  1984      0.231029 1984-10-01
58     11  1984      0.809480 1984-11-01
There are enough months in the past
Target period: 03-1984 to 11-1984
Target period includes only one year

Year: 1984 - End-Month of observation: 12
Forecast Year: 1985 -  Forecast Month: 3
Dropping year: 1984  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
51      4  1984     -0.272078 1984-04-01
52      5  1984      0.460168 1984-05-01
53      6  1984      0.978806 1984-06-01
54      7  1984      1.561809 1984-07-01
55      8  1984

Year: 1986 - End-Month of observation: 1
Forecast Year: 1986 -  Forecast Month: 4
Dropping year: 1986  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
64      5  1985      0.516957 1985-05-01
65      6  1985      0.530937 1985-06-01
66      7  1985      0.633409 1985-07-01
67      8  1985      0.220850 1985-08-01
68      9  1985      0.475918 1985-09-01
69     10  1985      0.671122 1985-10-01
70     11  1985      0.336701 1985-11-01
71     12  1985     -0.915141 1985-12-01
72      1  1986      0.060480 1986-01-01
There are enough months in the past
Target period: 05-1985 to 01-1986
Target period includes two years

Year: 1986 - End-Month of observation: 2
Forecast Year: 1986 -  Forecast Month: 5
Dropping year: 1986  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
65      6  1985      0.530937 1985-06-01
66      7  1985      0.633409 1985-07-01
67      8  1985      0.220850 1985-08-01
68      9  1985      0.475918 1985-09-01
69     10  1985      

Year: 1987 - End-Month of observation: 4
Forecast Year: 1987 -  Forecast Month: 7
Dropping year: 1987  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
79      8  1986      1.386902 1986-08-01
80      9  1986      1.003092 1986-09-01
81     10  1986      0.774743 1986-10-01
82     11  1986      1.217815 1986-11-01
83     12  1986      0.701937 1986-12-01
84      1  1987           NaN 1987-01-01
85      2  1987           NaN 1987-02-01
86      3  1987           NaN 1987-03-01
87      4  1987           NaN 1987-04-01
There are enough months in the past
Target period: 08-1986 to 04-1987
Target period includes two years

Year: 1987 - End-Month of observation: 5
Forecast Year: 1987 -  Forecast Month: 8
Dropping year: 1987  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
80      9  1986      1.003092 1986-09-01
81     10  1986      0.774743 1986-10-01
82     11  1986      1.217815 1986-11-01
83     12  1986      0.701937 1986-12-01
84      1  1987      

Year: 1988 - End-Month of observation: 7
Forecast Year: 1988 -  Forecast Month: 10
Dropping year: 1988  - get series for analysis.
     Month  Year  Anomaly_Qlog       DATE
94      11  1987           NaN 1987-11-01
95      12  1987           NaN 1987-12-01
96       1  1988      1.695788 1988-01-01
97       2  1988      0.119670 1988-02-01
98       3  1988      1.681609 1988-03-01
99       4  1988      0.717188 1988-04-01
100      5  1988     -0.967284 1988-05-01
101      6  1988     -1.301700 1988-06-01
102      7  1988     -0.615400 1988-07-01
There are enough months in the past
Target period: 11-1987 to 07-1988
Target period includes two years

Year: 1988 - End-Month of observation: 8
Forecast Year: 1988 -  Forecast Month: 11
Dropping year: 1988  - get series for analysis.
     Month  Year  Anomaly_Qlog       DATE
95      12  1987           NaN 1987-12-01
96       1  1988      1.695788 1988-01-01
97       2  1988      0.119670 1988-02-01
98       3  1988      1.681609 1988-03-01
99  

Year: 1989 - End-Month of observation: 10
Forecast Year: 1990 -  Forecast Month: 1
Dropping year: 1989  - get series for analysis.
     Month  Year  Anomaly_Qlog       DATE
109      2  1989     -1.349524 1989-02-01
110      3  1989     -1.100457 1989-03-01
111      4  1989     -1.221652 1989-04-01
112      5  1989     -1.966323 1989-05-01
113      6  1989     -2.108455 1989-06-01
114      7  1989     -1.530132 1989-07-01
115      8  1989     -0.442800 1989-08-01
116      9  1989     -1.350103 1989-09-01
117     10  1989     -1.737324 1989-10-01
There are enough months in the past
Target period: 02-1989 to 10-1989
Target period includes only one year

Year: 1989 - End-Month of observation: 11
Forecast Year: 1990 -  Forecast Month: 2
Dropping year: 1989  - get series for analysis.
     Month  Year  Anomaly_Qlog       DATE
110      3  1989     -1.100457 1989-03-01
111      4  1989     -1.221652 1989-04-01
112      5  1989     -1.966323 1989-05-01
113      6  1989     -2.108455 1989-06-01


Year: 1990 - End-Month of observation: 12
Forecast Year: 1991 -  Forecast Month: 3
Dropping year: 1990  - get series for analysis.
     Month  Year  Anomaly_Qlog       DATE
123      4  1990      1.923675 1990-04-01
124      5  1990     -0.170332 1990-05-01
125      6  1990      0.048584 1990-06-01
126      7  1990     -1.538323 1990-07-01
127      8  1990     -2.189276 1990-08-01
128      9  1990     -1.351109 1990-09-01
129     10  1990     -0.008010 1990-10-01
130     11  1990      0.655603 1990-11-01
131     12  1990      1.268143 1990-12-01
There are enough months in the past
Target period: 04-1990 to 12-1990
Target period includes only one year

Year: 1991 - End-Month of observation: 1
Forecast Year: 1991 -  Forecast Month: 4
Dropping year: 1991  - get series for analysis.
     Month  Year  Anomaly_Qlog       DATE
124      5  1990     -0.170332 1990-05-01
125      6  1990      0.048584 1990-06-01
126      7  1990     -1.538323 1990-07-01
127      8  1990     -2.189276 1990-08-01
1

In [26]:
RESULTS = pd.DataFrame()
RESULTS['MONTH_PRED'] = MON_PRED
RESULTS['YEAR_PRED'] = ANO_PRED
RESULTS['MONTH_TARG'] = MON_TARG
RESULTS['YEAR_TARG'] = ANO_TARG
RESULTS['RMSE'] = RMSE
RESULTS['TARG'] = TARG
RESULTS['PRED'] = PRED
RESULTS['OBS_ANOM'] = OBS_ANOM
RESULTS['MEAN_ANOM_HIND'] = MEAN_ANOM_HIND
RESULTS['STD_ANOM_HIND'] = STD_ANOM_HIND
RESULTS['Q1_ANOM_HIND'] = Q1_ANOM_HIND
RESULTS['Q3_ANOM_HIND'] = Q3_ANOM_HIND

In [27]:
RESULTS.iloc[66:80]

Unnamed: 0,MONTH_PRED,YEAR_PRED,MONTH_TARG,YEAR_TARG,RMSE,TARG,PRED,OBS_ANOM,MEAN_ANOM_HIND,STD_ANOM_HIND,Q1_ANOM_HIND,Q3_ANOM_HIND
66,6,1981,"[7, 8, 9, 10, 11, 12, 1, 2, 3]",1983-1984,1.3569,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[-0.2901, 0.75167, 1.5238, 1.35134, 0.84712, -...",0.97881,0.031153,1.055501,0.072001,0.608469
67,6,1981,"[7, 8, 9, 10, 11, 12, 1, 2, 3]",1984-1985,,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[1.56181, nan, -0.01008, 0.23103, 0.80948, -1....",,0.031153,1.055501,0.072001,0.608469
68,6,1981,"[7, 8, 9, 10, 11, 12, 1, 2, 3]",1985-1986,0.8953,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[0.63341, 0.22085, 0.47592, 0.67112, 0.3367, -...",0.35902,0.031153,1.055501,0.072001,0.608469
69,6,1981,"[7, 8, 9, 10, 11, 12, 1, 2, 3]",1986-1987,,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[0.12559, 1.3869, 1.00309, 0.77474, 1.21781, 0...",,0.031153,1.055501,0.072001,0.608469
70,6,1981,"[7, 8, 9, 10, 11, 12, 1, 2, 3]",1987-1988,,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[nan, nan, nan, nan, nan, nan, 1.69579, 0.1196...",,0.031153,1.055501,0.072001,0.608469
71,6,1981,"[7, 8, 9, 10, 11, 12, 1, 2, 3]",1988-1989,1.361,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[-0.6154, -0.48525, -1.03157, -1.14216, -0.726...",-2.10845,0.031153,1.055501,0.072001,0.608469
72,6,1981,"[7, 8, 9, 10, 11, 12, 1, 2, 3]",1989-1990,1.3608,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[-1.53013, -0.4428, -1.3501, -1.73732, -1.5021...",0.04858,0.031153,1.055501,0.072001,0.608469
73,6,1981,"[7, 8, 9, 10, 11, 12, 1, 2, 3]",1990-1991,,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...",,0.031153,1.055501,0.072001,0.608469
74,7,1981,"[8, 9, 10, 11, 12, 1, 2, 3, 4]",1981-1982,0.8133,"[0.10954, -0.35398, 0.5367, -0.33175, 1.29762,...","[0.41381, 0.61431, -0.67744, -1.30647, 0.57294...",0.62003,-0.038961,1.052579,-0.537328,0.630197
75,7,1981,"[8, 9, 10, 11, 12, 1, 2, 3, 4]",1982-1983,,"[0.10954, -0.35398, 0.5367, -0.33175, 1.29762,...","[0.23456, 0.47972, nan, nan, nan, -0.07843, 0....",,-0.038961,1.052579,-0.537328,0.630197


In [48]:
RESULTS.iloc[1230:1239]

Unnamed: 0,MONTH_PRED,YEAR_PRED,MONTH_TARG,YEAR_TARG,RMSE,TARG,PRED,OBS_ANOM,MEAN_ANOM_HIND,STD_ANOM_HIND,Q1_ANOM_HIND,Q3_ANOM_HIND
1230,11,1990,"[12, 1, 2, 3, 4, 5, 6, 7, 8]",1980-1981,1.5616,"[-0.23592, -0.54092, 0.59955, 0.28523, 1.92367...","[1.29762, 0.45825, 0.42578, -0.45832, -0.27447...",-1.30647,-0.08195,1.036233,-0.749602,0.810985
1231,11,1990,"[12, 1, 2, 3, 4, 5, 6, 7, 8]",1981-1982,,"[-0.23592, -0.54092, 0.59955, 0.28523, 1.92367...","[0.57294, -0.90715, 0.16444, -0.47138, -0.4753...",,-0.08195,1.036233,-0.749602,0.810985
1232,11,1990,"[12, 1, 2, 3, 4, 5, 6, 7, 8]",1982-1983,,"[-0.23592, -0.54092, 0.59955, 0.28523, 1.92367...","[nan, -0.07843, 0.03771, -1.05671, -0.50764, -...",,-0.08195,1.036233,-0.749602,0.810985
1233,11,1990,"[12, 1, 2, 3, 4, 5, 6, 7, 8]",1983-1984,,"[-0.23592, -0.54092, 0.59955, 0.28523, 1.92367...","[-0.27539, 1.59568, 2.25918, 1.12386, -0.27208...",,-0.08195,1.036233,-0.749602,0.810985
1234,11,1990,"[12, 1, 2, 3, 4, 5, 6, 7, 8]",1984-1985,1.3916,"[-0.23592, -0.54092, 0.59955, 0.28523, 1.92367...","[-1.16299, -0.56993, -1.24619, 0.66713, 0.5877...",0.3367,-0.08195,1.036233,-0.749602,0.810985
1235,11,1990,"[12, 1, 2, 3, 4, 5, 6, 7, 8]",1985-1986,1.7103,"[-0.23592, -0.54092, 0.59955, 0.28523, 1.92367...","[-0.91514, 0.06048, -0.40739, -1.00258, -0.743...",1.21781,-0.08195,1.036233,-0.749602,0.810985
1236,11,1990,"[12, 1, 2, 3, 4, 5, 6, 7, 8]",1986-1987,,"[-0.23592, -0.54092, 0.59955, 0.28523, 1.92367...","[0.70194, nan, nan, nan, nan, nan, nan, nan, nan]",,-0.08195,1.036233,-0.749602,0.810985
1237,11,1990,"[12, 1, 2, 3, 4, 5, 6, 7, 8]",1987-1988,,"[-0.23592, -0.54092, 0.59955, 0.28523, 1.92367...","[nan, 1.69579, 0.11967, 1.68161, 0.71719, -0.9...",,-0.08195,1.036233,-0.749602,0.810985
1238,11,1990,"[12, 1, 2, 3, 4, 5, 6, 7, 8]",1988-1989,1.7504,"[-0.23592, -0.54092, 0.59955, 0.28523, 1.92367...","[-1.2512, -0.60938, -1.34952, -1.10046, -1.221...",-1.5021,-0.08195,1.036233,-0.749602,0.810985


In [28]:
RESULTS.tail(10)

Unnamed: 0,MONTH_PRED,YEAR_PRED,MONTH_TARG,YEAR_TARG,RMSE,TARG,PRED,OBS_ANOM,MEAN_ANOM_HIND,STD_ANOM_HIND,Q1_ANOM_HIND,Q3_ANOM_HIND
1365,12,1991,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",1981-1981,1.0248,"[0.43187, -0.84495, -0.71812, -1.03422, -0.247...","[0.45825, 0.42578, -0.45832, -0.27447, 1.48233...",0.57294,2.220446e-16,1.0,-0.761602,0.670979
1366,12,1991,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",1982-1982,,"[0.43187, -0.84495, -0.71812, -1.03422, -0.247...","[-0.90715, 0.16444, -0.47138, -0.47539, 0.6566...",,2.220446e-16,1.0,-0.761602,0.670979
1367,12,1991,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",1983-1983,0.613,"[0.43187, -0.84495, -0.71812, -1.03422, -0.247...","[-0.07843, 0.03771, -1.05671, -0.50764, -0.915...",-0.27539,2.220446e-16,1.0,-0.761602,0.670979
1368,12,1991,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",1984-1984,,"[0.43187, -0.84495, -0.71812, -1.03422, -0.247...","[1.59568, 2.25918, 1.12386, -0.27208, 0.46017,...",,2.220446e-16,1.0,-0.761602,0.670979
1369,12,1991,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",1985-1985,1.1223,"[0.43187, -0.84495, -0.71812, -1.03422, -0.247...","[-0.56993, -1.24619, 0.66713, 0.58775, 0.51696...",-0.91514,2.220446e-16,1.0,-0.761602,0.670979
1370,12,1991,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",1986-1986,0.3461,"[0.43187, -0.84495, -0.71812, -1.03422, -0.247...","[0.06048, -0.40739, -1.00258, -0.74372, 0.0757...",0.70194,2.220446e-16,1.0,-0.761602,0.670979
1371,12,1991,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",1987-1987,,"[0.43187, -0.84495, -0.71812, -1.03422, -0.247...","[nan, nan, nan, nan, nan, nan, nan, nan, nan]",,2.220446e-16,1.0,-0.761602,0.670979
1372,12,1991,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",1988-1988,1.541,"[0.43187, -0.84495, -0.71812, -1.03422, -0.247...","[1.69579, 0.11967, 1.68161, 0.71719, -0.96728,...",-1.2512,2.220446e-16,1.0,-0.761602,0.670979
1373,12,1991,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",1989-1989,0.9461,"[0.43187, -0.84495, -0.71812, -1.03422, -0.247...","[-0.60938, -1.34952, -1.10046, -1.22165, -1.96...",-0.23592,2.220446e-16,1.0,-0.761602,0.670979
1374,12,1991,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",1990-1990,1.5997,"[0.43187, -0.84495, -0.71812, -1.03422, -0.247...","[-0.54092, 0.59955, 0.28523, 1.92367, -0.17033...",1.26814,2.220446e-16,1.0,-0.761602,0.670979


In [29]:
TTT = pd.DataFrame()
TTT['DATE'] = pd.date_range(datetime.date.today().replace(year=RESULTS.iloc[0]['YEAR_PRED'], month=RESULTS.iloc[0]['MONTH_PRED'], day=1),datetime.date.today().replace(year=RESULTS.iloc[-1]['YEAR_PRED'], month=RESULTS.iloc[-1]['MONTH_PRED'], day=1),freq='MS')
TTT['MONTH_PRED'] = pd.date_range(datetime.date.today().replace(year=RESULTS.iloc[0]['YEAR_PRED'], month=RESULTS['MONTH_PRED'][0], day=1),datetime.date.today().replace(year=RESULTS.iloc[-1]['YEAR_PRED'], month=RESULTS.iloc[-1]['MONTH_PRED'], day=1),freq='MS').month
TTT['YEAR_PRED'] = pd.date_range(datetime.date.today().replace(year=RESULTS.iloc[0]['YEAR_PRED'], month=RESULTS['MONTH_PRED'][0], day=1),datetime.date.today().replace(year=RESULTS.iloc[-1]['YEAR_PRED'], month=RESULTS.iloc[-1]['MONTH_PRED'], day=1),freq='MS').year
TTT['MONTH_PRED_cal'] = TTT['MONTH_PRED'].apply(lambda x: calendar.month_abbr[x])

In [30]:
TTT.tail(5)

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,MONTH_PRED_cal
128,1991-08-01,8,1991,Aug
129,1991-09-01,9,1991,Sep
130,1991-10-01,10,1991,Oct
131,1991-11-01,11,1991,Nov
132,1991-12-01,12,1991,Dec


In [31]:
best_n = 5
INDEX_best_n = []
RMSE_best_n = []
w_best_n = []
OBS_ANOM_best_n = []

MEAN_ANOM_HIND_best_n = []
STD_ANOM_HIND_best_n = []
Q1_ANOM_HIND_best_n = []
Q3_ANOM_HIND_best_n = []

for y in np.sort(TTT['YEAR_PRED'].unique()):
    #print(y)
    for m in np.sort(TTT['MONTH_PRED'].unique()):
        
        ## Get index observerved anomaly value series best n RMSE sorted
        AUX_1 = RESULTS.query('MONTH_PRED == @m & YEAR_PRED == @y').sort_values(by=['RMSE']).head(best_n).index
        AUX_2 = AUX_1.values.tolist()
        if AUX_2:
            INDEX_best_n.append(AUX_2)
        else:
            pass
        
        ## Get predicted anomaly value series best n RMSE sorted
        AUX_3 = RESULTS.query('MONTH_PRED == @m & YEAR_PRED == @y').sort_values(by=['RMSE']).head(best_n)
        AUX_4 = AUX_3['RMSE'].values.tolist()
#         AUX_5 = AUX_3['OBS_ANOM'].values.tolist()
        if AUX_4:
            RMSE_best_n.append(AUX_4)
            #Calculation of weights
            w_best_n.append(((1/np.array(AUX_4)) / (1/np.array(AUX_4)).sum()).round(6))
        else:
            pass

        ## Get observerved anomaly value series best n RMSE sorted
        AUX_5 = AUX_3['OBS_ANOM'].values.tolist()
        if AUX_5:
            OBS_ANOM_best_n.append(AUX_5)    
        else:
            pass
        
        ## Get mean anomaly of hindcast series best n RMSE sorted
        AUX_6 = AUX_3['MEAN_ANOM_HIND'].values.tolist()              
        if AUX_6:
            MEAN_ANOM_HIND_best_n.append(AUX_6[0])    
        else:
            pass
        
        ## Get standard deviation anomaly of hindcast series best n RMSE sorted
        AUX_7 = AUX_3['STD_ANOM_HIND'].values.tolist()       
        if AUX_7:
            STD_ANOM_HIND_best_n.append(AUX_7[0])    
        else:
            pass        
        
        ## Get Q1 anomaly of hindcast series best n RMSE sorted
        AUX_8 = AUX_3['Q1_ANOM_HIND'].values.tolist()       
        if AUX_8:
            Q1_ANOM_HIND_best_n.append(AUX_8[0])    
        else:
            pass        

        ## Get Q3 anomaly of hindcast series best n RMSE sorted
        AUX_9 = AUX_3['Q3_ANOM_HIND'].values.tolist()       
        if AUX_8:
            Q3_ANOM_HIND_best_n.append(AUX_9[0])    
        else:
            pass  

In [32]:
TTT['INDEX_sort_best_n'] = INDEX_best_n
TTT['RMSE_sort_best_n'] = RMSE_best_n
TTT['Weights_sort_best_n'] = w_best_n
TTT['OBS_ANOM_sort_best_n'] = OBS_ANOM_best_n
TTT['MEAN_ANOM_HIND_best_n'] = MEAN_ANOM_HIND_best_n
TTT['STD_ANOM_HIND_best_n'] = STD_ANOM_HIND_best_n
TTT['Q1_ANOM_HIND_best_n'] = Q1_ANOM_HIND_best_n
TTT['Q3_ANOM_HIND_best_n'] = Q3_ANOM_HIND_best_n

In [33]:
TTT.head(15)

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,MONTH_PRED_cal,INDEX_sort_best_n,RMSE_sort_best_n,Weights_sort_best_n,OBS_ANOM_sort_best_n,MEAN_ANOM_HIND_best_n,STD_ANOM_HIND_best_n,Q1_ANOM_HIND_best_n,Q3_ANOM_HIND_best_n
0,1980-12-01,12,1980,Dec,"[4, 0, 9, 5, 2]","[0.7324, 1.1009, 1.2993, 1.3424, 1.4514]","[0.304955, 0.202879, 0.1719, 0.166381, 0.153885]","[-0.91514, 0.57294, 1.26814, 0.70194, -0.27539]",-0.162202,0.933941,-0.925055,0.578105
1,1981-01-01,1,1981,Jan,"[15, 11, 18, 20, 13]","[0.6593, 0.9663, 1.2465, 1.2693, 1.3943]","[0.31216, 0.212984, 0.165108, 0.162142, 0.147606]","[0.06048, -0.90715, -0.60938, 0.43187, 1.59568]",-0.045825,1.041847,-0.588864,0.238748
2,1981-02-01,2,1981,Feb,"[26, 22, 29, 31, 24]","[0.4882, 1.0176, 1.2528, 1.3059, 1.447]","[0.387496, 0.185904, 0.151002, 0.144862, 0.130...","[-0.40739, 0.16444, -1.34952, -0.84495, 2.25918]",-0.042578,1.043529,-0.617423,0.201534
3,1981-03-01,3,1981,Mar,"[37, 33, 42, 35, 40]","[0.8753, 0.9171, 1.2808, 1.3696, 1.499]","[0.259011, 0.247206, 0.177009, 0.165532, 0.151...","[-1.00258, -0.47138, -0.71812, 1.12386, -1.10046]",0.045832,1.041843,-0.854662,0.850785
4,1981-04-01,4,1981,Apr,"[48, 44, 53, 46, 51]","[0.8528, 0.879, 1.2639, 1.287, 1.5283]","[0.258695, 0.250984, 0.174551, 0.171418, 0.144...","[-0.74372, -0.47539, -1.03422, -0.27208, -1.22...",0.027447,1.049716,-0.620959,0.649879
5,1981-05-01,5,1981,May,"[54, 58, 56, 63, 61]","[0.8728, 0.8773, 1.2607, 1.2667, 1.4908]","[0.252418, 0.251124, 0.174753, 0.173925, 0.14778]","[0.65669, 0.07577, 0.46017, -0.24716, -1.96632]",-0.148233,0.917905,-0.568139,0.487426
6,1981-06-01,6,1981,Jun,"[64, 68, 66, 72, 71]","[0.818, 0.8953, 1.3569, 1.3608, 1.361]","[0.268915, 0.245697, 0.162114, 0.161649, 0.161...","[0.99407, 0.35902, 0.97881, 0.04858, -2.10845]",0.031153,1.055501,0.072001,0.608469
7,1981-07-01,7,1981,Jul,"[74, 78, 76, 81, 82]","[0.8133, 0.9087, 1.3176, 1.3285, 1.3582]","[0.268581, 0.240384, 0.165784, 0.164424, 0.160...","[0.62003, 0.12559, 1.56181, -1.53013, -1.53832]",-0.038961,1.052579,-0.537328,0.630197
8,1981-08-01,8,1981,Aug,"[84, 88, 87, 92, 91]","[0.8526, 1.0218, 1.2633, 1.4541, 1.7456]","[0.279012, 0.23281, 0.188305, 0.163596, 0.136277]","[0.23456, 1.3869, 0.22085, -2.18928, -0.4428]",-0.051726,1.056095,-0.444498,0.255242
9,1981-09-01,9,1981,Sep,"[94, 98, 96, 97, 102]","[0.8962, 1.0065, 1.2612, 1.2868, 1.4199]","[0.254541, 0.226647, 0.180875, 0.177277, 0.160...","[0.47972, 1.00309, -0.01008, 0.47592, -1.35111]",-0.068257,1.035657,-0.868952,0.478808


In [34]:
TTT.tail(15)

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,MONTH_PRED_cal,INDEX_sort_best_n,RMSE_sort_best_n,Weights_sort_best_n,OBS_ANOM_sort_best_n,MEAN_ANOM_HIND_best_n,STD_ANOM_HIND_best_n,Q1_ANOM_HIND_best_n,Q3_ANOM_HIND_best_n
118,1990-10-01,10,1990,Oct,"[1220, 1225, 1224, 1228, 1223]","[1.3557, 1.371, 1.3728, 1.6709, 1.8024]","[0.220269, 0.217811, 0.217525, 0.178717, 0.165...","[-0.67744, 0.77474, 0.67112, -1.73732, 0.23103]",0.001001229,1.06904,-0.696027,0.675267
119,1990-11-01,11,1990,Nov,"[1234, 1230, 1235, 1238, 1231]","[1.3916, 1.5616, 1.7103, 1.7504, nan]","[nan, nan, nan, nan, nan]","[0.3367, -1.30647, 1.21781, -1.5021, nan]",-0.08195039,1.036233,-0.749602,0.810985
120,1990-12-01,12,1990,Dec,"[1248, 1240, 1245, 1241, 1249]","[1.289, 1.2993, 1.4872, 1.6145, 1.7174]","[0.226873, 0.225075, 0.196638, 0.181133, 0.170...","[-1.2512, 1.29762, -0.91514, 0.57294, -0.23592]",-0.1585178,0.940425,-0.925055,0.578105
121,1991-01-01,1,1991,Jan,"[1259, 1251, 1256, 1252, 1254]","[1.1174, 1.2693, 1.5043, 1.5955, 1.7924]","[0.253364, 0.223044, 0.1882, 0.177442, 0.15795]","[-0.60938, 0.45825, 0.06048, -0.90715, 1.59568]",-0.04318715,1.043223,-0.588864,0.251409
122,1991-02-01,2,1991,Feb,"[1270, 1262, 1267, 1263, 1265]","[1.198, 1.3059, 1.3769, 1.7233, 1.7837]","[0.240717, 0.220827, 0.20944, 0.167341, 0.161674]","[-1.34952, 0.42578, -0.40739, 0.16444, 2.25918]",0.08449548,1.011855,-0.17594,0.330069
123,1991-03-01,3,1991,Mar,"[1273, 1281, 1278, 1274, 1276]","[1.2808, 1.387, 1.5521, 1.7211, 1.8017]","[0.237886, 0.219671, 0.196305, 0.177029, 0.169...","[-0.45832, -1.10046, -1.00258, -0.47138, 1.12386]",0.07181207,1.023758,-0.726359,0.850785
124,1991-04-01,4,1991,Apr,"[1284, 1292, 1289, 1293, 1285]","[1.2639, 1.3721, 1.492, 1.5558, 1.62]","[0.229314, 0.211231, 0.194256, 0.18629, 0.178908]","[-0.27447, -1.22165, -0.74372, 1.92367, -0.47539]",0.1034218,0.990142,-0.490872,0.649879
125,1991-05-01,5,1991,May,"[1294, 1302, 1299, 1303, 1295]","[1.2667, 1.3566, 1.4815, 1.5145, 1.5602]","[0.225377, 0.210442, 0.1927, 0.188501, 0.18298]","[1.48233, -1.96632, 0.07577, -0.17033, 0.65669]",0.0247164,1.050545,-0.528186,0.58403
126,1991-06-01,6,1991,Jun,"[1304, 1312, 1313, 1309, 1305]","[1.2546, 1.2861, 1.3743, 1.4758, 1.5585]","[0.220093, 0.214703, 0.200923, 0.187105, 0.177...","[-0.28038, -2.10845, 0.04858, 0.35902, 0.99407]",1.373901e-16,1.0,-0.109316,0.579904
127,1991-07-01,7,1991,Jul,"[1314, 1322, 1319, 1315, 1323]","[1.0441, 1.2503, 1.2897, 1.4378, 1.6914]","[0.250744, 0.209391, 0.202995, 0.182085, 0.154...","[0.35065, -1.53013, 0.12559, 0.62003, -1.53832]",-5.551115e-16,1.0,-0.446244,0.626451


In [35]:
Pred_Anom = []
for i in TTT.index:
    Pred_Anom.append((TTT['Weights_sort_best_n'][i] * TTT['OBS_ANOM_sort_best_n'][i]).sum())
    
TTT['Frcst_Anom'] = Pred_Anom

In [36]:
TTT

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,MONTH_PRED_cal,INDEX_sort_best_n,RMSE_sort_best_n,Weights_sort_best_n,OBS_ANOM_sort_best_n,MEAN_ANOM_HIND_best_n,STD_ANOM_HIND_best_n,Q1_ANOM_HIND_best_n,Q3_ANOM_HIND_best_n,Frcst_Anom
0,1980-12-01,12,1980,Dec,"[4, 0, 9, 5, 2]","[0.7324, 1.1009, 1.2993, 1.3424, 1.4514]","[0.304955, 0.202879, 0.1719, 0.166381, 0.153885]","[-0.91514, 0.57294, 1.26814, 0.70194, -0.27539]",-1.622021e-01,0.933941,-0.925055,0.578105,0.129565
1,1981-01-01,1,1981,Jan,"[15, 11, 18, 20, 13]","[0.6593, 0.9663, 1.2465, 1.2693, 1.3943]","[0.31216, 0.212984, 0.165108, 0.162142, 0.147606]","[0.06048, -0.90715, -0.60938, 0.43187, 1.59568]",-4.582489e-02,1.041847,-0.588864,0.238748,0.030614
2,1981-02-01,2,1981,Feb,"[26, 22, 29, 31, 24]","[0.4882, 1.0176, 1.2528, 1.3059, 1.447]","[0.387496, 0.185904, 0.151002, 0.144862, 0.130...","[-0.40739, 0.16444, -1.34952, -0.84495, 2.25918]",-4.257839e-02,1.043529,-0.617423,0.201534,-0.158117
3,1981-03-01,3,1981,Mar,"[37, 33, 42, 35, 40]","[0.8753, 0.9171, 1.2808, 1.3696, 1.499]","[0.259011, 0.247206, 0.177009, 0.165532, 0.151...","[-1.00258, -0.47138, -0.71812, 1.12386, -1.10046]",4.583195e-02,1.041843,-0.854662,0.850785,-0.483722
4,1981-04-01,4,1981,Apr,"[48, 44, 53, 46, 51]","[0.8528, 0.879, 1.2639, 1.287, 1.5283]","[0.258695, 0.250984, 0.174551, 0.171418, 0.144...","[-0.74372, -0.47539, -1.03422, -0.27208, -1.22...",2.744664e-02,1.049716,-0.620959,0.649879,-0.715224
...,...,...,...,...,...,...,...,...,...,...,...,...,...
128,1991-08-01,8,1991,Aug,"[1324, 1329, 1325, 1328, 1332]","[0.914, 1.0147, 1.1854, 1.2511, 1.2526]","[0.242011, 0.217993, 0.186602, 0.176803, 0.176...","[0.41381, 1.3869, 0.23456, 0.22085, -0.4428]",-4.934325e-17,1.000000,-0.310240,0.370790,0.407103
129,1991-09-01,9,1991,Sep,"[1339, 1334, 1335, 1338, 1342]","[0.8609, 0.903, 1.0478, 1.2394, 1.3238]","[0.242722, 0.231406, 0.199427, 0.168597, 0.157...","[1.00309, 0.61431, 0.47972, 0.47592, -1.3501]",2.220446e-17,1.000000,-0.679225,0.544325,0.348424
130,1991-10-01,10,1991,Oct,"[1349, 1344, 1348, 1352, 1347]","[0.8838, 0.9432, 1.3219, 1.3487, 1.5984]","[0.262204, 0.245691, 0.175305, 0.171821, 0.14498]","[0.77474, -0.67744, 0.67112, -1.73732, 0.23103]",9.753001e-17,1.000000,-0.516775,0.638861,-0.110664
131,1991-11-01,11,1991,Nov,"[1354, 1359, 1362, 1358, 1363]","[0.9356, 0.9457, 1.343, 1.4265, 1.5841]","[0.254294, 0.251578, 0.177154, 0.166784, 0.150...","[-1.30647, 1.21781, -1.5021, 0.3367, 0.6556]",1.233581e-17,1.000000,-0.631683,0.772549,-0.137335


In [37]:
TTT['Frcst_Anom_Rest'] = ( TTT['Frcst_Anom'] - TTT['MEAN_ANOM_HIND_best_n'] ) / TTT['STD_ANOM_HIND_best_n']

In [38]:
HIND_FORECAST_ANALOGUE = pd.DataFrame()
HIND_FORECAST_ANALOGUE['DATE'] = TTT['DATE']
HIND_FORECAST_ANALOGUE['MONTH_PRED'] = TTT['MONTH_PRED']
HIND_FORECAST_ANALOGUE['YEAR_PRED'] = TTT['YEAR_PRED']
HIND_FORECAST_ANALOGUE['FORECAST'] = TTT['Frcst_Anom_Rest']

In [39]:
ANOMALY_ANALOGUE_TS

Unnamed: 0,Month,Year,Anomaly_Qlog,DATE
0,1,1980,-1.536267,1980-01-01
1,2,1980,0.241717,1980-02-01
2,3,1980,1.049742,1980-03-01
3,4,1980,1.300559,1980-04-01
4,5,1980,1.075049,1980-05-01
...,...,...,...,...
139,8,1991,,1991-08-01
140,9,1991,,1991-09-01
141,10,1991,,1991-10-01
142,11,1991,,1991-11-01


In [40]:
HIND_FORECAST_ANALOGUE

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,FORECAST
0,1980-12-01,12,1980,0.312405
1,1981-01-01,1,1981,0.073368
2,1981-02-01,2,1981,-0.110719
3,1981-03-01,3,1981,-0.508285
4,1981-04-01,4,1981,-0.707497
...,...,...,...,...
128,1991-08-01,8,1991,0.407103
129,1991-09-01,9,1991,0.348424
130,1991-10-01,10,1991,-0.110664
131,1991-11-01,11,1991,-0.137335


In [41]:
OBSERVED_AND_HIND_FORECAST_ANALOGUE = pd.merge(ANOMALY_ANALOGUE_TS, HIND_FORECAST_ANALOGUE[['DATE','FORECAST']], how='left', on='DATE')

In [42]:
OBSERVED_AND_HIND_FORECAST_ANALOGUE.head(31)

Unnamed: 0,Month,Year,Anomaly_Qlog,DATE,FORECAST
0,1,1980,-1.536267,1980-01-01,
1,2,1980,0.241717,1980-02-01,
2,3,1980,1.049742,1980-03-01,
3,4,1980,1.300559,1980-04-01,
4,5,1980,1.075049,1980-05-01,
5,6,1980,0.632953,1980-06-01,
6,7,1980,0.682468,1980-07-01,
7,8,1980,0.109535,1980-08-01,
8,9,1980,-0.353978,1980-09-01,
9,10,1980,0.536702,1980-10-01,


In [43]:
OBSERVED_AND_HIND_FORECAST_ANALOGUE.tail(25)

Unnamed: 0,Month,Year,Anomaly_Qlog,DATE,FORECAST
119,12,1989,-0.235918,1989-12-01,-0.130693
120,1,1990,-0.540918,1990-01-01,0.013391
121,2,1990,0.599554,1990-02-01,-0.083365
122,3,1990,0.28523,1990-03-01,-0.489073
123,4,1990,1.923675,1990-04-01,-0.829544
124,5,1990,-0.170332,1990-05-01,-0.227802
125,6,1990,0.048584,1990-06-01,-0.17099
126,7,1990,-1.538323,1990-07-01,-0.018226
127,8,1990,-2.189276,1990-08-01,0.116609
128,9,1990,-1.351109,1990-09-01,0.164803


### Save  OBSERVED_AND_HIND_FORECAST_ANALOGUE

In [None]:
filename = 'santalucia_caudales_mar2023' + '_FL_' + str(FORECAST_LENGTH) + '_DANA_' + str(DANA)

In [None]:
filename

In [None]:
OBSERVED_AND_HIND_FORECAST_ANALOGUE.to_csv('../data/' + filename + '.csv', index = False)