## Load libraries and functions

In [None]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
sns.set()
# import the KMeans clustering model from scikit-learn
import matplotlib.pyplot as plt

from myVAR import myVAR,adfuller_test,plot_vars,plot_comparison

import warnings
warnings.filterwarnings('ignore')

## Instantiate a VAR with parameters

In [None]:
def generate_result(country,nobs,diff_order,forecast_steps,p,plot=False,error=False):
    # load the data
    ts=pd.read_csv('https://mda-project-poland.s3.eu-west-3.amazonaws.com/var_data_COMPLETE.csv',index_col='Year')
    # ts.drop(columns=['Temperature (annual mean)','Population density (people per sq. km of land area)'],inplace=True)
    ts=ts.loc[ts['Country'] == country]
    ts.drop(columns=['Country'],inplace=True)
    ts.interpolate(method='linear', axis=0,inplace=True)

    v1=myVAR(ts)
    # split
    v1.ts_split(nobs)
    # difference
    v1.ts_train_diff=v1.get_diff(v1.ts_train,diff_order,verbose=False)
    # granger
    # granger_result=v1.granger_matrix()
    # plt.figure(figsize=(6,5));sns.heatmap(granger_result,annot=True, linewidths=.5, vmax=.05)
    # cointegration
    # v1.cointegration_test()
    # lag and fit
    # v1.inspect_lag(5)
    v1.fit = v1.get_fit(p)
    v1.fit.summary()
    # serial correlation
    # v1.check_serial_correlation(plot=True)
    # forecast diff
    v1.ts_forecast_diff,v1.ts_forecast_diff.lower,v1.ts_forecast_diff.upper=v1.get_forecast_diff(forecast_steps,plot=False)
    # real forecast (invert)
    v1.ts_forecast = v1.get_inv_diff(v1.ts_forecast_diff,diff_order)
    v1.ts_forecast.lower = v1.get_inv_diff(v1.ts_forecast_diff.lower,diff_order)
    v1.ts_forecast.upper = v1.get_inv_diff(v1.ts_forecast_diff.upper,diff_order)
    v1.ts_forecast.head()
    # results
    v1.ts_results=pd.concat([v1.ts_train,v1.ts_forecast])
    v1.ts_results.lower=pd.concat([v1.ts_train,v1.ts_forecast.lower])
    v1.ts_results.upper=pd.concat([v1.ts_train,v1.ts_forecast.upper])
    # v1.ts_results.tail()

    # generate output
    v1.ts_results.lower=v1.ts_results.lower.add_suffix('_lower')
    v1.ts_results.upper=v1.ts_results.upper.add_suffix('_upper')
    v1.ts_results.upper
    output=pd.concat([v1.ts_results,v1.ts_results.lower,v1.ts_results.upper], axis=1)
    output['Country']=country # add country as a column
    output = output.reindex(columns=sorted(output.columns)) # sort columns
    output = output.rename_axis(['Year']).reset_index() # make year a column

    if plot:
        plot_comparison(v1,
            steps=forecast_steps,
            name=str([country,nobs,diff_order,forecast_steps,p]),
            interval=True,
            figsize=(10,10))

    if error:
        error=v1.get_forecast_error(v1.ts_forecast)
        # error
        # error.loc[['RMSE'],['Water stress']]

    return output

- Bad country (by Python index)
  - 1 AFG
- Bad nobs
- Bad diff_order
- Bad p

In [None]:

df=pd.read_csv('https://mda-project-poland.s3.eu-west-3.amazonaws.com/var_data_COMPLETE.csv',index_col='Year')
countryList=df['Country'].unique() #37 countries
nobsList=[1,2,3,4]
diff_orderList=[1,2]
pList=[1,2,3,4]

country=countryList[0]
nobs=1 #parameter
diff_order=2 #parameter
p=3 #parameter
forecast_steps=50 #parameter


for nobs in nobsList:
    for diff_order in diff_orderList:
        for p in pList:
            df = pd.DataFrame()
            ID=f'nobs{nobs}__difforder{diff_order}__p{p}'
            for country in  countryList:
                if country in ['Afghanistan']:
                    print("DATA IS NOT VALID - CHANGE COUNTRY")
                    continue

                print('-'*30)
                print('Country::nobs::diff_order::forecast_steps::p')
                print(country,nobs,diff_order,forecast_steps,p)
                print('-'*30)
                try:
                    output=generate_result(country,nobs,diff_order,forecast_steps,p,plot=True)
                    df=pd.concat([df,output])
                except:
                    print("ERROR: PARAMETER NOT SUITABLE")
                    pass

            df.to_csv(f'{ID}.csv')
df