# HydroSOS - Persistence and Hydrological Analogues method

In this notebook, we explore the Persistance and Hydrological Analogue method

## Import the python libraries

In [1]:
%reset -f

In [2]:
# Importing the libraries
import pandas as pd
import seaborn as sns
import numpy as np
import calendar
import datetime
import matplotlib.pyplot as plt
import math
from scipy import stats
plt.style.use('classic')
%matplotlib inline

from IPython.display import HTML

sns.set()

In [3]:
# install a conda package in the current Jupyter kernel
#import sys
#!conda install --yes --prefix {sys.prefix} missingno

In [4]:
# import missingno

### We create function for percentile definition

In [5]:
# Quantile Function Definitions
def q1(x):
    return x.quantile(0.28)

def q2(x):
    return x.median()

def q3(x):
    return x.quantile(0.72)

def q5(x):
    return x.quantile(0.05)

def q95(x):
    return x.quantile(0.95)

def q87(x):
    return x.quantile(0.87)

def q13(x):
    return x.quantile(0.13)


We create a function called add_month that add the number of months based on an input date. This will be used in the Jack-Knife Validation

In [6]:
def add_months(sourcedate, months):
    month = sourcedate.month - 1 + months
    year = sourcedate.year + month // 12
    month = month % 12 + 1
    day = min(sourcedate.day, calendar.monthrange(year,month)[1])
    return datetime.date(year, month, day)

the monthly anomalies of the most recently past months are compared with all possible historical sequences of anomalies covering the same months of the year. That is, if the recent past covers, say, the months February to July, then potential analogues are sought only in the February to July sequences of the historical record

Define a Root Mean Square Error Function. This function will be used to select the analogues years

In [7]:
def rmse(predictions,targets):
    return np.around(((predictions - targets) ** 2).mean() ** 0.5,4)

## Import Data 

In this example we import the daily discharge data from Fray Marcos station, from 01 Jan 1980 to 31 Jan 2023. We import and then visualize head and tail of dataframe, plot timeseries and also make boxplot for daily discharge grouped by month.

In [8]:
MONTHLY_ANOMALY = pd.read_csv('../notebook_verificacion/santalucia_caudales_prueba_MA.csv',parse_dates=['Month'],index_col="Month",na_values="")

In [9]:
MONTHLY_ANOMALY

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-1.536267,0.458249,-0.907146,-0.078435,1.595684,-0.569929,0.06048,,1.695788,-0.609378,...,,,,,,,,,,
2,0.241717,0.425784,0.164443,0.037709,2.259184,-1.246188,-0.407393,,0.11967,-1.349524,...,,,,,,,,,,
3,1.049742,-0.458319,-0.471385,-1.056707,1.123855,0.667133,-1.002581,,1.681609,-1.100457,...,,,,,,,,,,
4,1.300559,-0.274466,-0.475394,-0.507639,-0.272078,0.587747,-0.743722,,0.717188,-1.221652,...,,,,,,,,,,
5,1.075049,1.482334,0.656692,-0.915862,0.460168,0.516957,0.075766,,-0.967284,-1.966323,...,,,,,,,,,,
6,0.632953,-0.280376,0.994073,0.146152,0.978806,0.530937,0.359025,,-1.3017,-2.108455,...,,,,,,,,,,
7,0.682468,0.350652,0.620029,-0.290099,1.561809,0.633409,0.125589,,-0.6154,-1.530132,...,,,,,,,,,,
8,0.109535,0.413811,0.234558,0.751669,,0.22085,1.386902,,-0.485248,-0.4428,...,,,,,,,,,,
9,-0.353978,0.614313,0.47972,1.523802,-0.01008,0.475918,1.003092,,-1.031575,-1.350103,...,,,,,,,,,,
10,0.536702,-0.677438,,1.351341,0.231029,0.671122,0.774743,,-1.142164,-1.737324,...,,,,,,,,,,


In [10]:
DISCHARGE_MONTHLY = pd.read_csv('../notebook_verificacion/santalucia_caudales_prueba_DA.csv',parse_dates=['Fecha'],index_col="Fecha",na_values="")

In [11]:
DISCHARGE_MONTHLY

Unnamed: 0_level_0,Discharge,Year,Month,water_year,Q_to_log,Anomaly_Qlog
Fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-01-31,1.198613,1980,1,1980,0.181165,-1.536267
1980-02-29,28.386414,1980,2,1980,3.345911,0.241717
1980-03-31,86.782323,1980,3,1980,4.463403,1.049742
1980-04-30,172.187367,1980,4,1981,5.148583,1.300559
1980-05-31,275.560258,1980,5,1981,5.618806,1.075049
...,...,...,...,...,...,...
2022-12-31,,2022,12,2023,,
2023-01-31,,2023,1,2023,,
2023-02-28,,2023,2,2023,,
2023-03-31,,2023,3,2023,,


In [12]:
YEAR_HINDCAST_SELECTED = np.unique(DISCHARGE_MONTHLY['Year'])
# rename columns change str for int
MONTHLY_ANOMALY.columns = YEAR_HINDCAST_SELECTED

In [13]:
YEAR_HINDCAST_SELECTED

array([1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990,
       1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
       2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023],
      dtype=int64)

In [14]:
range_analysis = range(YEAR_HINDCAST_SELECTED[0],YEAR_HINDCAST_SELECTED[12])

In [15]:
LOG_DISCHARGE_MONTHLY_PIVOT = pd.pivot_table(DISCHARGE_MONTHLY, index=['Month'],columns=['Year'], values=['Q_to_log'],dropna=False)
LOG_DISCHARGE_MONTHLY_PIVOT.columns = YEAR_HINDCAST_SELECTED

In [16]:
LOG_DISCHARGE_MONTHLY_PIVOT

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.181165,2.266864,0.839047,1.705645,3.456299,1.191681,1.850911,,3.56098,1.150429,...,,,,,,,,,,
2,3.345911,3.59657,3.240681,3.068096,6.093265,1.319706,2.461964,,3.179709,1.178985,...,,,,,,,,,,
3,4.463403,2.653676,2.637997,1.93559,4.552342,4.004259,2.000542,,5.221666,1.883088,...,,,,,,,,,,
4,5.148583,3.169494,2.917019,2.876502,3.172495,4.252904,2.579853,,4.415552,1.979313,...,,,,,,,,,,
5,5.618806,6.181355,5.040966,2.868932,4.769523,4.847961,4.238582,,2.797907,1.41802,...,,,,,,,,,,
6,5.431652,4.226532,5.908143,4.789328,5.887999,5.297043,5.070209,,2.878916,1.814417,...,,,,,,,,,,
7,5.315642,4.974052,5.251364,4.314426,6.220885,5.265138,4.742359,,3.979544,3.037866,...,,,,,,,,,,
8,5.119868,5.464556,5.261496,5.847285,,5.245967,6.566886,,4.44609,4.494176,...,,,,,,,,,,
9,4.11759,5.092622,4.957092,6.008443,4.463882,4.953263,5.484107,,3.435276,3.11453,...,,,,,,,,,,
10,4.931459,3.168846,,6.114101,4.487702,5.126601,5.277033,,2.494186,1.63017,...,,,,,,,,,,


In [17]:
ANOMALY_ANALOGUES = MONTHLY_ANOMALY[range_analysis]
QLOG_ANALOGUES = LOG_DISCHARGE_MONTHLY_PIVOT[range_analysis]
HTML(ANOMALY_ANALOGUES.to_html())

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,-1.536267,0.458249,-0.907146,-0.078435,1.595684,-0.569929,0.06048,,1.695788,-0.609378,-0.540918,0.431872
2,0.241717,0.425784,0.164443,0.037709,2.259184,-1.246188,-0.407393,,0.11967,-1.349524,0.599554,-0.844955
3,1.049742,-0.458319,-0.471385,-1.056707,1.123855,0.667133,-1.002581,,1.681609,-1.100457,0.28523,-0.718121
4,1.300559,-0.274466,-0.475394,-0.507639,-0.272078,0.587747,-0.743722,,0.717188,-1.221652,1.923675,-1.034218
5,1.075049,1.482334,0.656692,-0.915862,0.460168,0.516957,0.075766,,-0.967284,-1.966323,-0.170332,-0.247164
6,0.632953,-0.280376,0.994073,0.146152,0.978806,0.530937,0.359025,,-1.3017,-2.108455,0.048584,
7,0.682468,0.350652,0.620029,-0.290099,1.561809,0.633409,0.125589,,-0.6154,-1.530132,-1.538323,
8,0.109535,0.413811,0.234558,0.751669,,0.22085,1.386902,,-0.485248,-0.4428,-2.189276,
9,-0.353978,0.614313,0.47972,1.523802,-0.01008,0.475918,1.003092,,-1.031575,-1.350103,-1.351109,
10,0.536702,-0.677438,,1.351341,0.231029,0.671122,0.774743,,-1.142164,-1.737324,-0.00801,


In [18]:
ANOMALY_ANALOGUE_TS = pd.melt(ANOMALY_ANALOGUES.reset_index(), id_vars='Month',var_name=['Year'],ignore_index = True)
ANOMALY_ANALOGUE_TS.columns = ['Month','Year','Anomaly_Qlog']
HTML(ANOMALY_ANALOGUE_TS.head(6).to_html())

Unnamed: 0,Month,Year,Anomaly_Qlog
0,1,1980,-1.536267
1,2,1980,0.241717
2,3,1980,1.049742
3,4,1980,1.300559
4,5,1980,1.075049
5,6,1980,0.632953


In [19]:
QLOG_ANALOGUE_TS = pd.melt(QLOG_ANALOGUES.reset_index(), id_vars='Month',var_name=['Year'],ignore_index = True)
QLOG_ANALOGUE_TS.columns = ['Month','Year','Qlog']
HTML(QLOG_ANALOGUE_TS.head(6).to_html())

Unnamed: 0,Month,Year,Qlog
0,1,1980,0.181165
1,2,1980,3.345911
2,3,1980,4.463403
3,4,1980,5.148583
4,5,1980,5.618806
5,6,1980,5.431652


In [20]:
ANOMALY_ANALOGUE_TS['DATE'] = pd.to_datetime(ANOMALY_ANALOGUE_TS[['Year', 'Month']].assign(DAY=1))
HTML(ANOMALY_ANALOGUE_TS.head(6).to_html(index=False))

Month,Year,Anomaly_Qlog,DATE
1,1980,-1.536267,1980-01-01
2,1980,0.241717,1980-02-01
3,1980,1.049742,1980-03-01
4,1980,1.300559,1980-04-01
5,1980,1.075049,1980-05-01
6,1980,0.632953,1980-06-01


In [21]:
######## PARTE CENTRAL

## HINDCAST PARA FORECAST_LENGHT EN MESES

In [22]:
## define Dana last months of observation
DANA = 6
## define forecast lenght in months
FORECAST_LENGTH = 2
##

In [44]:
END_DATE_HIST = datetime.date.today().replace(year=y, month=m, day=1)
END_DATE_HIST


datetime.date(1992, 12, 1)

In [45]:
FORECAST_DATE = add_months(END_DATE_HIST,FORECAST_LENGTH)
FORECAST_DATE

datetime.date(1993, 2, 1)

In [46]:
FORECAST_DATE.month

2

In [69]:
YEAR_ANALYSIS[-1]

1991

In [70]:
##
# Toma como argumentos
# range_analysis - > YEAR_ANALYSIS
# QLOG_ANALOGUES
# QLOG_ANALOGUE_TS
# ANOMALY_ANALOGUES
# ANOMALY_ANALOGUE_TS
# MONTH_ANALYSIS
# DANA
# FORECAST_LENGTH
##


YEAR_ANALYSIS = range_analysis # definir al menos para tener DANA meses para atras con datos..

MONTH_ANALYSIS = range(1,13,1)
df = pd.DataFrame()
ANOMALY_HISTORICAL = pd.DataFrame()
RMSE = []
ANO_TARG = []
ANO_PRED = []
MON_TARG = []
MON_PRED = []
TARG = []
PRED = []
OBS_ANOM = []

MEAN_ANOM_HIND = []
STD_ANOM_HIND = []
Q1_ANOM_HIND = []
Q3_ANOM_HIND = []

MEAN_QLOG_HIND = []
STD_QLOG_HIND = []

##
print('Dana selected: ',DANA, 'months')
print('Forecast length selected: ', FORECAST_LENGTH , 'months')
##


for y in YEAR_ANALYSIS:    
            
    
    for m in MONTH_ANALYSIS:
        
        print('Year:',y,"-","End-Month of observation:",m)
        # define end month and year based on the for-loop
        END_DATE_HIST = datetime.date.today().replace(year=y, month=m, day=1)

        # define initial date according to Dana past months of observation
        FIRST_DATE_HIST = add_months(END_DATE_HIST,-DANA)
        
        # Define forecast date
        FORECAST_DATE = add_months(END_DATE_HIST,FORECAST_LENGTH)       
        print('Forecast Year:',FORECAST_DATE.year,"-"," Forecast Month:",FORECAST_DATE.month)

        
        if FORECAST_DATE.year <= YEAR_ANALYSIS[-1]:
            # From the Qlog values, drop the monthly values for the year of analysis and mantain the remaining years. 
            # This is the Qlog hindcast series
            # print('Dropping year:',y,"-","End-Month:",m)
            print('Dropping year:',y," - get series for analysis.")
            QLOG_HINDCAST_TARG_ANALOGUE = QLOG_ANALOGUES.drop([FORECAST_DATE.year],axis=1)
            QLOG_HINDCAST_TARG_ANALOGUE_TS = QLOG_ANALOGUE_TS[QLOG_ANALOGUE_TS['Year'] != FORECAST_DATE.year]

            # From the monthly anomaly values, drop the monthly values for the year of analysis and mantain the remaining years. 
            # This is the anomaly hindcast series
            ANOMALY_HINDCAST_TARG_ANALOGUE = ANOMALY_ANALOGUES.drop([FORECAST_DATE.year],axis=1)
            ANOMALY_HINDCAST_TARG_ANALOGUE_TS = ANOMALY_ANALOGUE_TS[ANOMALY_ANALOGUE_TS['Year'] != FORECAST_DATE.year]
        
        else:
            print('Forecast Year out of range')
            break

            
        
        # query in the original anomaly time serie the first date and end_date
        ANOMALY_TARGET = ANOMALY_ANALOGUE_TS.query('DATE > @FIRST_DATE_HIST & DATE <= @END_DATE_HIST')
        print(ANOMALY_TARGET)
        
        # Check if there are enough months in the past
        if ANOMALY_TARGET.index.size < DANA:
            print('There are not enough months in the past')
            print('')
        
        else:
            print('There are enough months in the past')
            print(f"Target period: {add_months(FIRST_DATE_HIST,1).strftime('%m-%Y')} to {(END_DATE_HIST.strftime('%m-%Y'))}")
            
            # Define a list with the targets months
            MONTHS_TARG = pd.date_range(add_months(FIRST_DATE_HIST,1),add_months(END_DATE_HIST,1),freq='M').month.to_list()
            
            # Get the anomaly hindcast target series
            ANOMALY_HINDCAST_TARG = ANOMALY_ANALOGUE_TS.drop(ANOMALY_TARGET.index)
            ANOMALY_HINDCAST_TARG = ANOMALY_HINDCAST_TARG.query('Month == @MONTHS_TARG').reset_index()
            
            # Get the anomaly target series
            df['target'] = ANOMALY_TARGET["Anomaly_Qlog"].values.round(5)

            # Adjust anomaly hindcast target series index
            if MONTHS_TARG[0]>=MONTHS_TARG[-1]:
                print('Target period includes two years')
                print('')
                if ANOMALY_HINDCAST_TARG['Month'][0] != MONTHS_TARG[0]:
                    ANOMALY_HINDCAST_TARG.drop([*range(0,ANOMALY_HINDCAST_TARG[ANOMALY_HINDCAST_TARG['Month']==MONTHS_TARG[0]].index.min())],inplace=True)
                    ANOMALY_HINDCAST_TARG.reset_index(drop=True)
                    # print(len(ANOMALY_HINDCAST_TARG.index)/DANA)

            else:
                print('Target period includes only one year')
                print('')
                pass

            # Iterate over de anomaly hindcast target where are the potential analogues
            for i in range(0, int(len(ANOMALY_HINDCAST_TARG.index)/DANA)):

                
                # create a dataframe (df) for the "preditec" (analogue) and "target" (current year)
                df['predicted'] = ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA,1:-1]["Anomaly_Qlog"].values.round(5)
                
                # save month and year value of target series
                ANO_TARG.append(f"{str(ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA,1:-1]['Year'].tolist()[0])}-{str(ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA,1:-1]['Year'].tolist()[-1])}")
                MON_TARG.append(MONTHS_TARG)
                
                # save month and year value of predicted series
                ANO_PRED.append(add_months(END_DATE_HIST, FORECAST_LENGTH).year)
                MON_PRED.append(add_months(END_DATE_HIST, FORECAST_LENGTH).month)            

                # define the date to get the anomaly value of the actual month of the potential analogue
                DATE_DANA=datetime.date.today().replace(year=ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA]['Year'].values[-1], month=ANOMALY_HINDCAST_TARG.iloc[i*DANA:(i+1)*DANA]['Month'].values[-1], day=1)
                
                # get the anomaly value of the following month of the potential analogue
                ANOMALY_OBS = ANOMALY_ANALOGUE_TS.query('DATE == @add_months(@DATE_DANA,@FORECAST_LENGTH)')['Anomaly_Qlog'].values
                # print(ANOMALY_OBS)
                
                # get the mean and standard deviation of monthly anomaly series use for hindcast necessary for rescale de forecast
                MEAN_ANOM_HIND.append(ANOMALY_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month].mean())
                STD_ANOM_HIND.append(ANOMALY_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month].std())           
                
                # get statistic of monthly anomaly serie use for hindcast necesary contingency tables
                Q1_ANOM_HIND.append(q1(ANOMALY_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month]))
                Q3_ANOM_HIND.append(q3(ANOMALY_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month]))  
                
                # get the mean and standard deviation of monthly qlog series use for hindcast 
                MEAN_QLOG_HIND.append(QLOG_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month].mean())
                STD_QLOG_HIND.append(QLOG_HINDCAST_TARG_ANALOGUE.loc[add_months(DATE_DANA,FORECAST_LENGTH).month].std())

                
                try:
                    if math.isnan(ANOMALY_OBS):
                        #print('Next month to end-month is nan')
                        RMSE_val = np.nan
                        RMSE.append(RMSE_val)
                        TARG.append(df['target'].values.tolist())
                        PRED.append(df['predicted'].values.tolist())
                        OBS_ANOM.append(np.nan)
                    else:
                        #print('Next month to end-month is not empty')
                        if df['predicted'].isnull().values.any() == False:
                            # calculate the RMSE
                            RMSE_val = rmse(df['predicted'],df['target'])
                            # append in the empty lists
                            RMSE.append(RMSE_val)
                            TARG.append(df['target'].values.tolist())
                            PRED.append(df['predicted'].values.tolist())
                            OBS_ANOM.append(ANOMALY_OBS.round(5)[0])
                        else:
                            RMSE_val = np.nan
                            RMSE.append(RMSE_val)
                            TARG.append(df['target'].values.tolist())
                            PRED.append(df['predicted'].values.tolist())
                            OBS_ANOM.append(np.nan)
                except:
                    if ANOMALY_OBS.size == 0:
                        #print('Next month to end-month is empty')
                        RMSE_val = np.nan
                        RMSE.append(RMSE_val)
                        TARG.append(df['target'].values.tolist())
                        PRED.append(df['predicted'].values.tolist())
                        OBS_ANOM.append(np.nan)      


Dana selected:  6 months
Forecast length selected:  2 months
Year: 1980 - End-Month of observation: 1
Forecast Year: 1980 -  Forecast Month: 3
Dropping year: 1980  - get series for analysis.
   Month  Year  Anomaly_Qlog       DATE
0      1  1980     -1.536267 1980-01-01
There are not enough months in the past

Year: 1980 - End-Month of observation: 2
Forecast Year: 1980 -  Forecast Month: 4
Dropping year: 1980  - get series for analysis.
   Month  Year  Anomaly_Qlog       DATE
0      1  1980     -1.536267 1980-01-01
1      2  1980      0.241717 1980-02-01
There are not enough months in the past

Year: 1980 - End-Month of observation: 3
Forecast Year: 1980 -  Forecast Month: 5
Dropping year: 1980  - get series for analysis.
   Month  Year  Anomaly_Qlog       DATE
0      1  1980     -1.536267 1980-01-01
1      2  1980      0.241717 1980-02-01
2      3  1980      1.049742 1980-03-01
There are not enough months in the past

Year: 1980 - End-Month of observation: 4
Forecast Year: 1980 -  Fo

Target period includes only one year

Year: 1981 - End-Month of observation: 8
Forecast Year: 1981 -  Forecast Month: 10
Dropping year: 1981  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
14      3  1981     -0.458319 1981-03-01
15      4  1981     -0.274466 1981-04-01
16      5  1981      1.482334 1981-05-01
17      6  1981     -0.280376 1981-06-01
18      7  1981      0.350652 1981-07-01
19      8  1981      0.413811 1981-08-01
There are enough months in the past
Target period: 03-1981 to 08-1981
Target period includes only one year

Year: 1981 - End-Month of observation: 9
Forecast Year: 1981 -  Forecast Month: 11
Dropping year: 1981  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
15      4  1981     -0.274466 1981-04-01
16      5  1981      1.482334 1981-05-01
17      6  1981     -0.280376 1981-06-01
18      7  1981      0.350652 1981-07-01
19      8  1981      0.413811 1981-08-01
20      9  1981      0.614313 1981-09-01
There are enough m

Target period includes two years

Year: 1983 - End-Month of observation: 2
Forecast Year: 1983 -  Forecast Month: 4
Dropping year: 1983  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
32      9  1982      0.479720 1982-09-01
33     10  1982           NaN 1982-10-01
34     11  1982           NaN 1982-11-01
35     12  1982           NaN 1982-12-01
36      1  1983     -0.078435 1983-01-01
37      2  1983      0.037709 1983-02-01
There are enough months in the past
Target period: 09-1982 to 02-1983
Target period includes two years

Year: 1983 - End-Month of observation: 3
Forecast Year: 1983 -  Forecast Month: 5
Dropping year: 1983  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
33     10  1982           NaN 1982-10-01
34     11  1982           NaN 1982-11-01
35     12  1982           NaN 1982-12-01
36      1  1983     -0.078435 1983-01-01
37      2  1983      0.037709 1983-02-01
38      3  1983     -1.056707 1983-03-01
There are enough months in t

Year: 1984 - End-Month of observation: 7
Forecast Year: 1984 -  Forecast Month: 9
Dropping year: 1984  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
49      2  1984      2.259184 1984-02-01
50      3  1984      1.123855 1984-03-01
51      4  1984     -0.272078 1984-04-01
52      5  1984      0.460168 1984-05-01
53      6  1984      0.978806 1984-06-01
54      7  1984      1.561809 1984-07-01
There are enough months in the past
Target period: 02-1984 to 07-1984
Target period includes only one year

Year: 1984 - End-Month of observation: 8
Forecast Year: 1984 -  Forecast Month: 10
Dropping year: 1984  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
50      3  1984      1.123855 1984-03-01
51      4  1984     -0.272078 1984-04-01
52      5  1984      0.460168 1984-05-01
53      6  1984      0.978806 1984-06-01
54      7  1984      1.561809 1984-07-01
55      8  1984           NaN 1984-08-01
There are enough months in the past
Target period: 03-198

Year: 1986 - End-Month of observation: 1
Forecast Year: 1986 -  Forecast Month: 3
Dropping year: 1986  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
67      8  1985      0.220850 1985-08-01
68      9  1985      0.475918 1985-09-01
69     10  1985      0.671122 1985-10-01
70     11  1985      0.336701 1985-11-01
71     12  1985     -0.915141 1985-12-01
72      1  1986      0.060480 1986-01-01
There are enough months in the past
Target period: 08-1985 to 01-1986
Target period includes two years

Year: 1986 - End-Month of observation: 2
Forecast Year: 1986 -  Forecast Month: 4
Dropping year: 1986  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
68      9  1985      0.475918 1985-09-01
69     10  1985      0.671122 1985-10-01
70     11  1985      0.336701 1985-11-01
71     12  1985     -0.915141 1985-12-01
72      1  1986      0.060480 1986-01-01
73      2  1986     -0.407393 1986-02-01
There are enough months in the past
Target period: 09-1985 to 

Year: 1987 - End-Month of observation: 7
Forecast Year: 1987 -  Forecast Month: 9
Dropping year: 1987  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
85      2  1987           NaN 1987-02-01
86      3  1987           NaN 1987-03-01
87      4  1987           NaN 1987-04-01
88      5  1987           NaN 1987-05-01
89      6  1987           NaN 1987-06-01
90      7  1987           NaN 1987-07-01
There are enough months in the past
Target period: 02-1987 to 07-1987
Target period includes only one year

Year: 1987 - End-Month of observation: 8
Forecast Year: 1987 -  Forecast Month: 10
Dropping year: 1987  - get series for analysis.
    Month  Year  Anomaly_Qlog       DATE
86      3  1987           NaN 1987-03-01
87      4  1987           NaN 1987-04-01
88      5  1987           NaN 1987-05-01
89      6  1987           NaN 1987-06-01
90      7  1987           NaN 1987-07-01
91      8  1987           NaN 1987-08-01
There are enough months in the past
Target period: 03-198

Year: 1989 - End-Month of observation: 1
Forecast Year: 1989 -  Forecast Month: 3
Dropping year: 1989  - get series for analysis.
     Month  Year  Anomaly_Qlog       DATE
103      8  1988     -0.485248 1988-08-01
104      9  1988     -1.031575 1988-09-01
105     10  1988     -1.142164 1988-10-01
106     11  1988     -0.726399 1988-11-01
107     12  1988     -1.251198 1988-12-01
108      1  1989     -0.609378 1989-01-01
There are enough months in the past
Target period: 08-1988 to 01-1989
Target period includes two years

Year: 1989 - End-Month of observation: 2
Forecast Year: 1989 -  Forecast Month: 4
Dropping year: 1989  - get series for analysis.
     Month  Year  Anomaly_Qlog       DATE
104      9  1988     -1.031575 1988-09-01
105     10  1988     -1.142164 1988-10-01
106     11  1988     -0.726399 1988-11-01
107     12  1988     -1.251198 1988-12-01
108      1  1989     -0.609378 1989-01-01
109      2  1989     -1.349524 1989-02-01
There are enough months in the past
Target perio

Year: 1990 - End-Month of observation: 7
Forecast Year: 1990 -  Forecast Month: 9
Dropping year: 1990  - get series for analysis.
     Month  Year  Anomaly_Qlog       DATE
121      2  1990      0.599554 1990-02-01
122      3  1990      0.285230 1990-03-01
123      4  1990      1.923675 1990-04-01
124      5  1990     -0.170332 1990-05-01
125      6  1990      0.048584 1990-06-01
126      7  1990     -1.538323 1990-07-01
There are enough months in the past
Target period: 02-1990 to 07-1990
Target period includes only one year

Year: 1990 - End-Month of observation: 8
Forecast Year: 1990 -  Forecast Month: 10
Dropping year: 1990  - get series for analysis.
     Month  Year  Anomaly_Qlog       DATE
122      3  1990      0.285230 1990-03-01
123      4  1990      1.923675 1990-04-01
124      5  1990     -0.170332 1990-05-01
125      6  1990      0.048584 1990-06-01
126      7  1990     -1.538323 1990-07-01
127      8  1990     -2.189276 1990-08-01
There are enough months in the past
Target 

In [71]:
RESULTS = pd.DataFrame()
RESULTS['MONTH_PRED'] = MON_PRED
RESULTS['YEAR_PRED'] = ANO_PRED
RESULTS['MONTH_TARG'] = MON_TARG
RESULTS['YEAR_TARG'] = ANO_TARG
RESULTS['RMSE'] = RMSE
RESULTS['TARG'] = TARG
RESULTS['PRED'] = PRED
RESULTS['OBS_ANOM'] = OBS_ANOM
RESULTS['MEAN_ANOM_HIND'] = MEAN_ANOM_HIND
RESULTS['STD_ANOM_HIND'] = STD_ANOM_HIND
RESULTS['Q1_ANOM_HIND'] = Q1_ANOM_HIND
RESULTS['Q3_ANOM_HIND'] = Q3_ANOM_HIND

In [72]:
RESULTS.iloc[66:80]

Unnamed: 0,MONTH_PRED,YEAR_PRED,MONTH_TARG,YEAR_TARG,RMSE,TARG,PRED,OBS_ANOM,MEAN_ANOM_HIND,STD_ANOM_HIND,Q1_ANOM_HIND,Q3_ANOM_HIND
66,2,1981,"[7, 8, 9, 10, 11, 12]",1981-1981,0.8256,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[0.35065, 0.41381, 0.61431, -0.67744, -1.30647...",0.16444,-0.042578,1.043529,-0.617423,0.201534
67,2,1981,"[7, 8, 9, 10, 11, 12]",1982-1982,,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[0.62003, 0.23456, 0.47972, nan, nan, nan]",,-0.042578,1.043529,-0.617423,0.201534
68,2,1981,"[7, 8, 9, 10, 11, 12]",1983-1983,1.2525,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[-0.2901, 0.75167, 1.5238, 1.35134, 0.84712, -...",2.25918,-0.042578,1.043529,-0.617423,0.201534
69,2,1981,"[7, 8, 9, 10, 11, 12]",1984-1984,,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[1.56181, nan, -0.01008, 0.23103, 0.80948, -1....",,-0.042578,1.043529,-0.617423,0.201534
70,2,1981,"[7, 8, 9, 10, 11, 12]",1985-1985,1.0054,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[0.63341, 0.22085, 0.47592, 0.67112, 0.3367, -...",-0.40739,-0.042578,1.043529,-0.617423,0.201534
71,2,1981,"[7, 8, 9, 10, 11, 12]",1986-1986,,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[0.12559, 1.3869, 1.00309, 0.77474, 1.21781, 0...",,-0.042578,1.043529,-0.617423,0.201534
72,2,1981,"[7, 8, 9, 10, 11, 12]",1987-1987,,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[nan, nan, nan, nan, nan, nan]",,-0.042578,1.043529,-0.617423,0.201534
73,2,1981,"[7, 8, 9, 10, 11, 12]",1988-1988,1.4123,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[-0.6154, -0.48525, -1.03157, -1.14216, -0.726...",-1.34952,-0.042578,1.043529,-0.617423,0.201534
74,2,1981,"[7, 8, 9, 10, 11, 12]",1989-1989,1.5856,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[-1.53013, -0.4428, -1.3501, -1.73732, -1.5021...",0.59955,-0.042578,1.043529,-0.617423,0.201534
75,2,1981,"[7, 8, 9, 10, 11, 12]",1990-1990,1.4424,"[0.68247, 0.10954, -0.35398, 0.5367, -0.33175,...","[-1.53832, -2.18928, -1.35111, -0.00801, 0.655...",-0.84495,-0.042578,1.043529,-0.617423,0.201534


In [73]:
RESULTS.tail(10)

Unnamed: 0,MONTH_PRED,YEAR_PRED,MONTH_TARG,YEAR_TARG,RMSE,TARG,PRED,OBS_ANOM,MEAN_ANOM_HIND,STD_ANOM_HIND,Q1_ANOM_HIND,Q3_ANOM_HIND
1442,12,1991,"[5, 6, 7, 8, 9, 10]",1981-1981,1.7295,"[-0.24716, nan, nan, nan, nan, nan]","[1.48233, -0.28038, 0.35065, 0.41381, 0.61431,...",0.57294,2.220446e-16,1.0,-0.761602,0.670979
1443,12,1991,"[5, 6, 7, 8, 9, 10]",1982-1982,,"[-0.24716, nan, nan, nan, nan, nan]","[0.65669, 0.99407, 0.62003, 0.23456, 0.47972, ...",,2.220446e-16,1.0,-0.761602,0.670979
1444,12,1991,"[5, 6, 7, 8, 9, 10]",1983-1983,0.6687,"[-0.24716, nan, nan, nan, nan, nan]","[-0.91586, 0.14615, -0.2901, 0.75167, 1.5238, ...",-0.27539,2.220446e-16,1.0,-0.761602,0.670979
1445,12,1991,"[5, 6, 7, 8, 9, 10]",1984-1984,,"[-0.24716, nan, nan, nan, nan, nan]","[0.46017, 0.97881, 1.56181, nan, -0.01008, 0.2...",,2.220446e-16,1.0,-0.761602,0.670979
1446,12,1991,"[5, 6, 7, 8, 9, 10]",1985-1985,0.7641,"[-0.24716, nan, nan, nan, nan, nan]","[0.51696, 0.53094, 0.63341, 0.22085, 0.47592, ...",-0.91514,2.220446e-16,1.0,-0.761602,0.670979
1447,12,1991,"[5, 6, 7, 8, 9, 10]",1986-1986,0.3229,"[-0.24716, nan, nan, nan, nan, nan]","[0.07577, 0.35902, 0.12559, 1.3869, 1.00309, 0...",0.70194,2.220446e-16,1.0,-0.761602,0.670979
1448,12,1991,"[5, 6, 7, 8, 9, 10]",1987-1987,,"[-0.24716, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan]",,2.220446e-16,1.0,-0.761602,0.670979
1449,12,1991,"[5, 6, 7, 8, 9, 10]",1988-1988,0.7201,"[-0.24716, nan, nan, nan, nan, nan]","[-0.96728, -1.3017, -0.6154, -0.48525, -1.0315...",-1.2512,2.220446e-16,1.0,-0.761602,0.670979
1450,12,1991,"[5, 6, 7, 8, 9, 10]",1989-1989,1.7192,"[-0.24716, nan, nan, nan, nan, nan]","[-1.96632, -2.10845, -1.53013, -0.4428, -1.350...",-0.23592,2.220446e-16,1.0,-0.761602,0.670979
1451,12,1991,"[5, 6, 7, 8, 9, 10]",1990-1990,0.0768,"[-0.24716, nan, nan, nan, nan, nan]","[-0.17033, 0.04858, -1.53832, -2.18928, -1.351...",1.26814,2.220446e-16,1.0,-0.761602,0.670979


In [74]:
TTT = pd.DataFrame()
TTT['DATE'] = pd.date_range(datetime.date.today().replace(year=RESULTS.iloc[0]['YEAR_PRED'], month=RESULTS.iloc[0]['MONTH_PRED'], day=1),datetime.date.today().replace(year=RESULTS.iloc[-1]['YEAR_PRED'], month=RESULTS.iloc[-1]['MONTH_PRED'], day=1),freq='MS')
TTT['MONTH_PRED'] = pd.date_range(datetime.date.today().replace(year=RESULTS.iloc[0]['YEAR_PRED'], month=RESULTS['MONTH_PRED'][0], day=1),datetime.date.today().replace(year=RESULTS.iloc[-1]['YEAR_PRED'], month=RESULTS.iloc[-1]['MONTH_PRED'], day=1),freq='MS').month
TTT['YEAR_PRED'] = pd.date_range(datetime.date.today().replace(year=RESULTS.iloc[0]['YEAR_PRED'], month=RESULTS['MONTH_PRED'][0], day=1),datetime.date.today().replace(year=RESULTS.iloc[-1]['YEAR_PRED'], month=RESULTS.iloc[-1]['MONTH_PRED'], day=1),freq='MS').year
TTT['MONTH_PRED_cal'] = TTT['MONTH_PRED'].apply(lambda x: calendar.month_abbr[x])

In [75]:
TTT.tail(5)

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,MONTH_PRED_cal
132,1991-08-01,8,1991,Aug
133,1991-09-01,9,1991,Sep
134,1991-10-01,10,1991,Oct
135,1991-11-01,11,1991,Nov
136,1991-12-01,12,1991,Dec


In [76]:
best_n = 5
INDEX_best_n = []
RMSE_best_n = []
w_best_n = []
OBS_ANOM_best_n = []

MEAN_ANOM_HIND_best_n = []
STD_ANOM_HIND_best_n = []
Q1_ANOM_HIND_best_n = []
Q3_ANOM_HIND_best_n = []

for y in np.sort(TTT['YEAR_PRED'].unique()):
    #print(y)
    for m in np.sort(TTT['MONTH_PRED'].unique()):
        
        ## Get index observerved anomaly value series best n RMSE sorted
        AUX_1 = RESULTS.query('MONTH_PRED == @m & YEAR_PRED == @y').sort_values(by=['RMSE']).head(best_n).index
        AUX_2 = AUX_1.values.tolist()
        if AUX_2:
            INDEX_best_n.append(AUX_2)
        else:
            pass
        
        ## Get predicted anomaly value series best n RMSE sorted
        AUX_3 = RESULTS.query('MONTH_PRED == @m & YEAR_PRED == @y').sort_values(by=['RMSE']).head(best_n)
        AUX_4 = AUX_3['RMSE'].values.tolist()
#         AUX_5 = AUX_3['OBS_ANOM'].values.tolist()
        if AUX_4:
            RMSE_best_n.append(AUX_4)
            #Calculation of weights
            w_best_n.append(((1/np.array(AUX_4)) / (1/np.array(AUX_4)).sum()).round(6))
        else:
            pass

        ## Get observerved anomaly value series best n RMSE sorted
        AUX_5 = AUX_3['OBS_ANOM'].values.tolist()
        if AUX_5:
            OBS_ANOM_best_n.append(AUX_5)    
        else:
            pass
        
        ## Get mean anomaly of hindcast series best n RMSE sorted
        AUX_6 = AUX_3['MEAN_ANOM_HIND'].values.tolist()              
        if AUX_6:
            MEAN_ANOM_HIND_best_n.append(AUX_6[0])    
        else:
            pass
        
        ## Get standard deviation anomaly of hindcast series best n RMSE sorted
        AUX_7 = AUX_3['STD_ANOM_HIND'].values.tolist()       
        if AUX_7:
            STD_ANOM_HIND_best_n.append(AUX_7[0])    
        else:
            pass        
        
        ## Get Q1 anomaly of hindcast series best n RMSE sorted
        AUX_8 = AUX_3['Q1_ANOM_HIND'].values.tolist()       
        if AUX_8:
            Q1_ANOM_HIND_best_n.append(AUX_8[0])    
        else:
            pass        

        ## Get Q3 anomaly of hindcast series best n RMSE sorted
        AUX_9 = AUX_3['Q3_ANOM_HIND'].values.tolist()       
        if AUX_8:
            Q3_ANOM_HIND_best_n.append(AUX_9[0])    
        else:
            pass  

In [77]:
TTT['INDEX_sort_best_n'] = INDEX_best_n
TTT['RMSE_sort_best_n'] = RMSE_best_n
TTT['Weights_sort_best_n'] = w_best_n
TTT['OBS_ANOM_sort_best_n'] = OBS_ANOM_best_n
TTT['MEAN_ANOM_HIND_best_n'] = MEAN_ANOM_HIND_best_n
TTT['STD_ANOM_HIND_best_n'] = STD_ANOM_HIND_best_n
TTT['Q1_ANOM_HIND_best_n'] = Q1_ANOM_HIND_best_n
TTT['Q3_ANOM_HIND_best_n'] = Q3_ANOM_HIND_best_n

In [78]:
TTT.head(15)

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,MONTH_PRED_cal,INDEX_sort_best_n,RMSE_sort_best_n,Weights_sort_best_n,OBS_ANOM_sort_best_n,MEAN_ANOM_HIND_best_n,STD_ANOM_HIND_best_n,Q1_ANOM_HIND_best_n,Q3_ANOM_HIND_best_n
0,1980-08-01,8,1980,Aug,"[9, 4, 1, 0, 5]","[0.8149, 0.8291, 1.0145, 1.2759, 1.4397]","[0.250574, 0.246283, 0.201274, 0.160038, 0.14183]","[-2.18928, 0.22085, 0.23456, 0.41381, 1.3869]",-0.013692,1.068143,-0.444498,0.427325
1,1980-09-01,9,1980,Sep,"[15, 12, 11, 14, 20]","[0.7295, 0.9818, 0.9915, 1.1416, 1.1493]","[0.266487, 0.198006, 0.196069, 0.170289, 0.169...","[0.47592, 0.47972, 0.61431, -0.01008, -1.35111]",0.039331,1.052424,-0.786416,0.582011
2,1980-10-01,10,1980,Oct,"[26, 22, 29, 27, 31]","[0.4064, 0.9965, 1.3349, 1.3788, 1.4766]","[0.438165, 0.178696, 0.133396, 0.129149, 0.120...","[0.67112, -0.67744, -1.14216, 0.77474, -0.00801]",-0.067088,1.047169,-0.696027,0.675267
3,1980-11-01,11,1980,Nov,"[37, 33, 38, 40, 35]","[0.5056, 0.8776, 1.2272, 1.3386, 1.435]","[0.367897, 0.211951, 0.151572, 0.138958, 0.129...","[0.3367, -1.30647, 1.21781, -0.7264, 0.84712]",0.041469,1.06074,-0.749602,0.810985
4,1980-12-01,12,1980,Dec,"[48, 44, 49, 46, 51]","[0.417, 0.7762, 0.905, 1.2747, 1.4849]","[0.383734, 0.206155, 0.176815, 0.125533, 0.107...","[-0.91514, 0.57294, 0.70194, -0.27539, -1.2512]",-0.162202,0.933941,-0.925055,0.578105
5,1981-01-01,1,1981,Jan,"[59, 55, 57, 62, 64]","[0.4433, 0.8562, 1.0935, 1.2393, 1.462]","[0.386987, 0.200364, 0.156883, 0.138426, 0.11734]","[0.06048, -0.90715, 1.59568, -0.60938, 0.43187]",-0.045825,1.041847,-0.588864,0.238748
6,1981-02-01,2,1981,Feb,"[66, 70, 68, 73, 75]","[0.8256, 1.0054, 1.2525, 1.4123, 1.4424]","[0.27493, 0.225763, 0.181224, 0.160718, 0.157364]","[0.16444, -0.40739, 2.25918, -1.34952, -0.84495]",-0.042578,1.043529,-0.617423,0.201534
7,1981-03-01,3,1981,Mar,"[77, 81, 86, 79, 85]","[0.9869, 1.0182, 1.1219, 1.2754, 1.3655]","[0.230125, 0.223051, 0.202434, 0.17807, 0.16632]","[-0.47138, -1.00258, -0.71812, 1.12386, 0.28523]",0.045832,1.041843,-0.854662,0.850785
8,1981-04-01,4,1981,Apr,"[96, 87, 91, 95, 90]","[0.8044, 0.9849, 1.0726, 1.3487, 1.3797]","[0.266942, 0.21802, 0.200194, 0.159211, 0.155634]","[-1.03422, -0.47539, -0.74372, 1.92367, 0.58775]",0.027447,1.049716,-0.620959,0.649879
9,1981-05-01,5,1981,May,"[106, 97, 101, 105, 99]","[0.7018, 0.902, 1.0416, 1.3212, 1.3956]","[0.286872, 0.223201, 0.193286, 0.152382, 0.144...","[-0.24716, 0.65669, 0.07577, -0.17033, 0.46017]",-0.148233,0.917905,-0.568139,0.487426


In [79]:
TTT.tail(15)

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,MONTH_PRED_cal,INDEX_sort_best_n,RMSE_sort_best_n,Weights_sort_best_n,OBS_ANOM_sort_best_n,MEAN_ANOM_HIND_best_n,STD_ANOM_HIND_best_n,Q1_ANOM_HIND_best_n,Q3_ANOM_HIND_best_n
122,1990-10-01,10,1990,Oct,"[1300, 1292, 1297, 1295, 1293]","[1.2662, 1.4766, 1.481, 1.7552, 1.7592]","[0.240753, 0.206448, 0.205835, 0.173679, 0.173...","[-1.14216, 0.5367, 0.67112, 1.35134, -0.67744]",0.001001229,1.06904,-0.696027,0.675267
123,1990-11-01,11,1990,Nov,"[1311, 1303, 1308, 1312, 1304]","[1.1381, 1.4995, 1.6509, 1.8629, 1.9096]","[0.273576, 0.207641, 0.188599, 0.167136, 0.163...","[-0.7264, -0.33175, 0.3367, -1.5021, -1.30647]",-0.08195039,1.036233,-0.749602,0.810985
124,1990-12-01,12,1990,Dec,"[1322, 1314, 1323, 1319, 1315]","[1.1256, 1.4944, 1.5231, 1.5827, 1.7076]","[0.258894, 0.195002, 0.191327, 0.184122, 0.170...","[-1.2512, 1.29762, -0.23592, -0.91514, 0.57294]",-0.1585178,0.940425,-0.925055,0.578105
125,1991-01-01,1,1991,Jan,"[1333, 1325, 1330, 1334, 1326]","[1.2164, 1.462, 1.5631, 1.5994, 1.7613]","[0.246222, 0.20486, 0.19161, 0.187261, 0.170048]","[-0.60938, 0.45825, 0.06048, -0.54092, -0.90715]",-0.04318715,1.043223,-0.588864,0.251409
126,1991-02-01,2,1991,Feb,"[1336, 1345, 1344, 1337, 1341]","[1.4424, 1.4696, 1.4945, 1.779, 1.7885]","[0.21911, 0.215055, 0.211472, 0.177653, 0.176709]","[0.42578, 0.59955, -1.34952, 0.16444, -0.40739]",0.08449548,1.011855,-0.17594,0.330069
127,1991-03-01,3,1991,Mar,"[1347, 1355, 1356, 1352, 1348]","[1.1219, 1.5074, 1.5223, 1.5607, 1.6938]","[0.258904, 0.192692, 0.190806, 0.186111, 0.171...","[-0.45832, -1.10046, 0.28523, -1.00258, -0.47138]",0.07181207,1.023758,-0.726359,0.850785
128,1991-04-01,4,1991,Apr,"[1357, 1361, 1362, 1365, 1358]","[0.8044, 1.2216, 1.2245, 1.353, 1.3818]","[0.286363, 0.188564, 0.188118, 0.170252, 0.166...","[-0.27447, 0.58775, -0.74372, -1.22165, -0.47539]",0.1034218,0.990142,-0.490872,0.649879
129,1991-05-01,5,1991,May,"[1367, 1372, 1368, 1371, 1375]","[0.7018, 0.9781, 1.1294, 1.2298, 1.3557]","[0.291781, 0.209356, 0.18131, 0.166508, 0.151045]","[1.48233, 0.07577, 0.65669, 0.51696, -1.96632]",0.0247164,1.050545,-0.528186,0.58403
130,1991-06-01,6,1991,Jun,"[1377, 1382, 1378, 1385, 1381]","[0.7344, 0.9454, 1.1194, 1.2765, 1.3933]","[0.282862, 0.219731, 0.185576, 0.162737, 0.149...","[-0.28038, 0.35902, 0.99407, -2.10845, 0.53094]",1.373901e-16,1.0,-0.109316,0.579904
131,1991-07-01,7,1991,Jul,"[1388, 1387, 1392, 1395, 1391]","[0.8646, 0.9356, 0.9457, 1.343, 1.4265]","[0.244604, 0.226042, 0.223628, 0.157472, 0.148...","[0.62003, 0.35065, 0.12559, -1.53013, 0.63341]",-5.551115e-16,1.0,-0.446244,0.626451


In [80]:
Pred_Anom = []
for i in TTT.index:
    Pred_Anom.append((TTT['Weights_sort_best_n'][i] * TTT['OBS_ANOM_sort_best_n'][i]).sum())
    
TTT['Frcst_Anom'] = Pred_Anom

In [81]:
TTT

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,MONTH_PRED_cal,INDEX_sort_best_n,RMSE_sort_best_n,Weights_sort_best_n,OBS_ANOM_sort_best_n,MEAN_ANOM_HIND_best_n,STD_ANOM_HIND_best_n,Q1_ANOM_HIND_best_n,Q3_ANOM_HIND_best_n,Frcst_Anom
0,1980-08-01,8,1980,Aug,"[9, 4, 1, 0, 5]","[0.8149, 0.8291, 1.0145, 1.2759, 1.4397]","[0.250574, 0.246283, 0.201274, 0.160038, 0.14183]","[-2.18928, 0.22085, 0.23456, 0.41381, 1.3869]",-1.369191e-02,1.068143,-0.444498,0.427325,-0.184045
1,1980-09-01,9,1980,Sep,"[15, 12, 11, 14, 20]","[0.7295, 0.9818, 0.9915, 1.1416, 1.1493]","[0.266487, 0.198006, 0.196069, 0.170289, 0.169...","[0.47592, 0.47972, 0.61431, -0.01008, -1.35111]",3.933092e-02,1.052424,-0.786416,0.582011,0.112007
2,1980-10-01,10,1980,Oct,"[26, 22, 29, 27, 31]","[0.4064, 0.9965, 1.3349, 1.3788, 1.4766]","[0.438165, 0.178696, 0.133396, 0.129149, 0.120...","[0.67112, -0.67744, -1.14216, 0.77474, -0.00801]",-6.708771e-02,1.047169,-0.696027,0.675267,0.119737
3,1980-11-01,11,1980,Nov,"[37, 33, 38, 40, 35]","[0.5056, 0.8776, 1.2272, 1.3386, 1.435]","[0.367897, 0.211951, 0.151572, 0.138958, 0.129...","[0.3367, -1.30647, 1.21781, -0.7264, 0.84712]",4.146865e-02,1.060740,-0.749602,0.810985,0.040416
4,1980-12-01,12,1980,Dec,"[48, 44, 49, 46, 51]","[0.417, 0.7762, 0.905, 1.2747, 1.4849]","[0.383734, 0.206155, 0.176815, 0.125533, 0.107...","[-0.91514, 0.57294, 0.70194, -0.27539, -1.2512]",-1.622021e-01,0.933941,-0.925055,0.578105,-0.278346
...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1991-08-01,8,1991,Aug,"[1403, 1400, 1399, 1406, 1398]","[0.3461, 0.613, 0.8946, 0.9461, 1.0248]","[0.376645, 0.212654, 0.145715, 0.137783, 0.127...","[1.3869, 0.75167, 0.23456, -0.4428, 0.41381]",-4.934325e-17,1.000000,-0.310240,0.370790,0.708021
133,1991-09-01,9,1991,Sep,"[1414, 1411, 1410, 1417, 1409]","[0.3395, 0.636, 0.7431, 0.9208, 1.1457]","[0.376548, 0.201003, 0.172034, 0.138834, 0.111...","[1.00309, 1.5238, 0.47972, -1.3501, 0.61431]",2.220446e-17,1.000000,-0.679225,0.544325,0.647634
134,1991-10-01,10,1991,Oct,"[1425, 1422, 1428, 1420, 1424]","[0.2998, 0.5289, 1.0225, 1.1009, 1.3081]","[0.423451, 0.240027, 0.124157, 0.115315, 0.09705]","[0.77474, 1.35134, -1.73732, -0.67744, 0.67112]",9.753001e-17,1.000000,-0.516775,0.638861,0.423735
135,1991-11-01,11,1991,Nov,"[1436, 1433, 1439, 1435, 1431]","[0.3071, 0.6018, 1.2228, 1.2678, 1.3357]","[0.447709, 0.228467, 0.11244, 0.108449, 0.102936]","[1.21781, 0.84712, -1.5021, 0.3367, -1.30647]",1.233581e-17,1.000000,-0.631683,0.772549,0.471899


In [82]:
TTT['Frcst_Anom_Rest'] = ( TTT['Frcst_Anom'] - TTT['MEAN_ANOM_HIND_best_n'] ) / TTT['STD_ANOM_HIND_best_n']

In [83]:
HIND_FORECAST_ANALOGUE = pd.DataFrame()
HIND_FORECAST_ANALOGUE['DATE'] = TTT['DATE']
HIND_FORECAST_ANALOGUE['MONTH_PRED'] = TTT['MONTH_PRED']
HIND_FORECAST_ANALOGUE['YEAR_PRED'] = TTT['YEAR_PRED']
HIND_FORECAST_ANALOGUE['FORECAST'] = TTT['Frcst_Anom_Rest']

In [84]:
ANOMALY_ANALOGUE_TS

Unnamed: 0,Month,Year,Anomaly_Qlog,DATE
0,1,1980,-1.536267,1980-01-01
1,2,1980,0.241717,1980-02-01
2,3,1980,1.049742,1980-03-01
3,4,1980,1.300559,1980-04-01
4,5,1980,1.075049,1980-05-01
...,...,...,...,...
139,8,1991,,1991-08-01
140,9,1991,,1991-09-01
141,10,1991,,1991-10-01
142,11,1991,,1991-11-01


In [85]:
HIND_FORECAST_ANALOGUE

Unnamed: 0,DATE,MONTH_PRED,YEAR_PRED,FORECAST
0,1980-08-01,8,1980,-0.159485
1,1980-09-01,9,1980,0.069056
2,1980-10-01,10,1980,0.178409
3,1980-11-01,11,1980,-0.000992
4,1980-12-01,12,1980,-0.124359
...,...,...,...,...
132,1991-08-01,8,1991,0.708021
133,1991-09-01,9,1991,0.647634
134,1991-10-01,10,1991,0.423735
135,1991-11-01,11,1991,0.471899


In [86]:
OBSERVED_AND_HIND_FORECAST_ANALOGUE = pd.merge(ANOMALY_ANALOGUE_TS, HIND_FORECAST_ANALOGUE[['DATE','FORECAST']], how='left', on='DATE')

In [87]:
OBSERVED_AND_HIND_FORECAST_ANALOGUE.head(31)

Unnamed: 0,Month,Year,Anomaly_Qlog,DATE,FORECAST
0,1,1980,-1.536267,1980-01-01,
1,2,1980,0.241717,1980-02-01,
2,3,1980,1.049742,1980-03-01,
3,4,1980,1.300559,1980-04-01,
4,5,1980,1.075049,1980-05-01,
5,6,1980,0.632953,1980-06-01,
6,7,1980,0.682468,1980-07-01,
7,8,1980,0.109535,1980-08-01,-0.159485
8,9,1980,-0.353978,1980-09-01,0.069056
9,10,1980,0.536702,1980-10-01,0.178409


In [88]:
OBSERVED_AND_HIND_FORECAST_ANALOGUE.tail(25)

Unnamed: 0,Month,Year,Anomaly_Qlog,DATE,FORECAST
119,12,1989,-0.235918,1989-12-01,-0.355507
120,1,1990,-0.540918,1990-01-01,-0.302995
121,2,1990,0.599554,1990-02-01,-0.496369
122,3,1990,0.28523,1990-03-01,-0.739936
123,4,1990,1.923675,1990-04-01,-0.698559
124,5,1990,-0.170332,1990-05-01,0.144933
125,6,1990,0.048584,1990-06-01,0.019911
126,7,1990,-1.538323,1990-07-01,0.526253
127,8,1990,-2.189276,1990-08-01,0.07656
128,9,1990,-1.351109,1990-09-01,0.024773


### Save  OBSERVED_AND_HIND_FORECAST_ANALOGUE

In [44]:
filename = 'santalucia_caudales_mar2023' + '_FL_' + str(FORECAST_LENGTH) + '_DANA_' + str(DANA)

In [45]:
filename

'santalucia_caudales_mar2023_FL_1_DANA_6'

In [47]:
OBSERVED_AND_HIND_FORECAST_ANALOGUE.to_csv('../data/' + filename + '.csv', index = False)