### ACTUAL


In [None]:
import os
import json
import pandas as pd
import numpy as np
import xgboost as xgb

from prophet import Prophet
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt

def save_plot(train, test_y, predictions, country, indicator, model_name):
    """Function to save the plot in both Indicators and Countries folders."""

    model_colors = {
    "ARIMA": "blue",
    "Holt_Winters": "yellow",
    "LSTM": "black",
    "XGBoost": "pink",
    "Prophet": "brown"
}

    # Plotting predicted vs actual
    plt.figure(figsize=(10, 6))
    if model_name == "Prophet":
        plt.plot(train['ds'], train['y'], label='Train Data', color='green', linestyle='--')
        plt.plot(test_y['ds'], test_y['y'], label='Actual', color='red', linestyle='--')
        plt.plot(test_y['ds'], predictions, label=f'Predicted({model_name})', color=f'{model_colors["Prophet"]}', 
                 linestyle='-', marker='o')
    else:
        plt.plot(train.index, train, label='Train Data', color='green', linestyle='--')
        plt.plot(test_y.index, test_y, label='Actual', color='red', linestyle='--')
        plt.plot(test_y.index, predictions, label=f'Predicted({model_name})', color=f'{model_colors[model_name]}', 
                 linestyle='-', marker='o')
    

    
    plt.title(f'Predicted({model_name}) vs Actual for {country} - {indicator}')
    plt.xlabel('Year')
    plt.ylabel('Value')
    plt.legend()

    # Create subfolder for the indicator if it doesn't exist
    indicator_folder = os.path.join('../images', 'model_plot', 'Indicators', indicator)
    os.makedirs(indicator_folder, exist_ok=True)
    
    # Save the plot in the Indicators folder with dynamic model name
    plot_filename_indicator = os.path.join(indicator_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_indicator)

    # Create subfolder for the country if it doesn't exist
    country_folder = os.path.join('../images', 'model_plot', 'Countries', country)
    os.makedirs(country_folder, exist_ok=True)
    
    # Save the same plot in the Countries folder with dynamic model name
    plot_filename_country = os.path.join(country_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_country)

    plt.close()





def train_prophet(train_df, test_y, country, indicator):
    param_grid = {
        'changepoint_prior_scale': [0.01, 0.05, 0.1, 0.2, 0.5],
        'seasonality_mode': ['additive', 'multiplicative'],
        'changepoint_range': [0.8, 0.9, 1],  # Range of the history for changepoint detection
        'n_changepoints': [15, 20, 25, 30],  # Number of changepoints
        'yearly_seasonality': [True, False]  # Adding the toggle for yearly seasonality
    }

    best_rmse = float('inf')
    best_params = None

    for changepoint_prior in param_grid['changepoint_prior_scale']:
        for seasonality_mode in param_grid['seasonality_mode']:
            for changepoint_range in param_grid['changepoint_range']:
                for n_changepoints in param_grid['n_changepoints']:
                    for yearly_seasonality in param_grid['yearly_seasonality']:
                        try:
                            model = Prophet(
                                yearly_seasonality=yearly_seasonality,
                                weekly_seasonality=False,
                                daily_seasonality=False,
                                changepoint_prior_scale=changepoint_prior,
                                seasonality_mode=seasonality_mode,
                                changepoint_range=changepoint_range,
                                n_changepoints=n_changepoints
                            )
                            model.fit(train_df)

                            future = model.make_future_dataframe(periods=len(test_y['y']), freq='Y')
                            forecast = model.predict(future)
                            predictions = forecast['yhat'].iloc[-len(test_y['y']):].values
                            rmse = np.sqrt(mean_squared_error(test_y['y'], predictions))

                            if rmse < best_rmse:
                                best_rmse = rmse
                                best_params = {
                                    'changepoint_prior_scale': changepoint_prior,
                                    'seasonality_mode': seasonality_mode,
                                    'changepoint_range': changepoint_range,
                                    'n_changepoints': n_changepoints,
                                    'yearly_seasonality': yearly_seasonality
                                }
                        except Exception as e:
                            print(f"Error with parameters: changepoint_prior={changepoint_prior}, "
                                          f"seasonality_mode={seasonality_mode}, changepoint_range={changepoint_range}, "
                                          f"n_changepoints={n_changepoints}, yearly_seasonality={yearly_seasonality}. Error: {e}")
                            continue

    # Train the best model with the selected parameters
    if best_params:
        best_model = Prophet(
            yearly_seasonality=best_params['yearly_seasonality'],
            weekly_seasonality=False,
            daily_seasonality=False,
            changepoint_prior_scale=best_params['changepoint_prior_scale'],
            seasonality_mode=best_params['seasonality_mode'],
            changepoint_range=best_params['changepoint_range'],
            n_changepoints=best_params['n_changepoints']
        )
        best_model.fit(train_df)
        future = best_model.make_future_dataframe(periods=len(test_y['y']), freq='Y')
        forecast = best_model.predict(future)
        predictions = forecast['yhat'].iloc[-len(test_y['y']):].values

        # Save the best parameters to JSON
        params_dir = os.path.join("../best_params", indicator)
        os.makedirs(params_dir, exist_ok=True)
        params_file = os.path.join(params_dir, f"Prophet_{country}.json")
        with open(params_file, "w") as f:
            json.dump(best_params, f, indent=4)

        # Call the save_plot function to save the plot
        save_plot(train_df, test_y, predictions, country, indicator, model_name="Prophet")


        return np.sqrt(mean_squared_error(test_y['y'], predictions)), predictions
    else:
        return None, None






with open("../countries.json", "r") as f:
    country_names = json.load(f)

with open("../indicators.json", "r") as f:
    indicators = json.load(f)

data_folder = "../data/base"
model_errors_rmse = {}
log_data = []
country_indicators_plots = {}
for country, country_code in country_names.items():
    for indicator, indicator_code in indicators.items():
        filename = f"{country.replace(' ', '_')}_{indicator.replace(' ', '_')}.parquet"
        filepath = os.path.join(data_folder, filename)
        
        if os.path.exists(filepath):
            df = pd.read_parquet(filepath)
            if 'Year' in df.columns and 'Value' in df.columns:
                df = df.set_index('Year').sort_index()
                df.index = pd.to_datetime(df.index, format='%Y')
                df = df.dropna()
                df = df.drop('Indicator', axis = 1)
                df_original = df.copy()
                
                
                #df = df.dropna()
                train_size = int(len(df) * 0.8)
                

                
                model_errors_rmse[(country, indicator)] = {}


                prophet_train_df = df_original.iloc[:train_size].reset_index().rename(columns={'Year': 'ds', 'Value': 'y'})
                prophet_test_df = df_original.iloc[train_size:].reset_index().rename(columns={'Year': 'ds', 'Value': 'y'})
                prophet_train_df['ds'] = pd.to_datetime(prophet_train_df['ds'])
                prophet_test_df['ds'] = pd.to_datetime(prophet_test_df['ds'])
                model_errors_rmse[(country, indicator)]['Prophet'] , prop_error = train_prophet(prophet_train_df, prophet_test_df,
                                                                                   country,indicator)
                
                sorted_models = sorted(model_errors_rmse[(country, indicator)].items(), key=lambda x: x[1])
                log_current_data = []
                for rank, (model_name, rmse) in enumerate(sorted_models, start=1):
                    log_data.append([country, indicator, model_name, rmse, rank])
                    log_current_data.append([country, indicator, model_name, rmse, rank])

                
from datetime import datetime
model ="Prophet"
log_dir = f"../data/{model}_train"
os.makedirs(log_dir, exist_ok=True)

timestamp = datetime.now().strftime("%Y-%m-%d--%H-%M")
log_filename = os.path.join(log_dir, f"{model}_error_log_{timestamp}.csv")

log_df = pd.DataFrame(log_data, columns=['Country', 'Indicator', 'Model', 'RMSE', 'Rank'])
log_df.to_csv(log_filename, index=False)


  from .autonotebook import tqdm as notebook_tqdm
22:09:59 - cmdstanpy - INFO - Chain [1] start processing
22:09:59 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
22:09:59 - cmdstanpy - INFO - Chain [1] start processing
22:09:59 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
22:09:59 - cmdstanpy - INFO - Chain [1] start processing
22:09:59 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
22:10:00 - cmdstanpy - INFO - Chain [1] start processing
22:10:00 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
22:10:00 - cmdstanpy - INFO - Chain [1] start processing
22:10:00 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
22:10:00 - cmdstanpy - INFO - Chain [1] start processing
22:10:01 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
22:10:01 - cmdstanpy - INFO - Chain [1] start processing
22:10:01 - cmdstanpy - INFO - Chain [1] done processing
  dates =

### TEST

In [1]:
import os
import json
import pandas as pd
import numpy as np
import xgboost as xgb

from prophet import Prophet
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt

def save_plot(train, test_y, predictions, country, indicator, model_name):
    """Function to save the plot in both Indicators and Countries folders."""

    model_colors = {
    "ARIMA": "blue",
    "Holt_Winters": "yellow",
    "LSTM": "black",
    "XGBoost": "pink",
    "Prophet": "brown"
}

    # Plotting predicted vs actual
    plt.figure(figsize=(10, 6))
    if model_name == "Prophet":
        plt.plot(train['ds'], train['y'], label='Train Data', color='green', linestyle='--')
        plt.plot(test_y['ds'], test_y['y'], label='Actual', color='red', linestyle='--')
        plt.plot(test_y['ds'], predictions, label=f'Predicted({model_name})', color=f'{model_colors["Prophet"]}', 
                 linestyle='-', marker='o')
    else:
        plt.plot(train.index, train, label='Train Data', color='green', linestyle='--')
        plt.plot(test_y.index, test_y, label='Actual', color='red', linestyle='--')
        plt.plot(test_y.index, predictions, label=f'Predicted({model_name})', color=f'{model_colors[model_name]}', 
                 linestyle='-', marker='o')
    

    
    plt.title(f'Predicted({model_name}) vs Actual for {country} - {indicator}')
    plt.xlabel('Year')
    plt.ylabel('Value')
    plt.legend()

    # Create subfolder for the indicator if it doesn't exist
    indicator_folder = os.path.join('../images', 'model_plot', 'Indicators', indicator)
    os.makedirs(indicator_folder, exist_ok=True)
    
    # Save the plot in the Indicators folder with dynamic model name
    plot_filename_indicator = os.path.join(indicator_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_indicator)

    # Create subfolder for the country if it doesn't exist
    country_folder = os.path.join('../images', 'model_plot', 'Countries', country)
    os.makedirs(country_folder, exist_ok=True)
    
    # Save the same plot in the Countries folder with dynamic model name
    plot_filename_country = os.path.join(country_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_country)

    plt.close()





def train_prophet(train_df, test_y, country, indicator):
    params_dir = os.path.join("../best_params", indicator)
    params_file = os.path.join(params_dir, f"Prophet_{country}.json")
    best_params = None

    # Check if the parameter file exists
    if os.path.exists(params_file):
        print(f"Loading parameters from {params_file}...")
        with open(params_file, "r") as f:
            best_params = json.load(f)
    else:
        print("No saved parameters found, performing grid search...")
        param_grid = {
            'changepoint_prior_scale': [0.01, 0.05, 0.1, 0.2, 0.5],
            'seasonality_mode': ['additive', 'multiplicative'],
            'changepoint_range': [0.8, 0.9, 1],  # Range of the history for changepoint detection
            'n_changepoints': [15, 20, 25, 30],  # Number of changepoints
            'yearly_seasonality': [True, False]  # Adding the toggle for yearly seasonality
        }

        best_rmse = float('inf')

        for changepoint_prior in param_grid['changepoint_prior_scale']:
            for seasonality_mode in param_grid['seasonality_mode']:
                for changepoint_range in param_grid['changepoint_range']:
                    for n_changepoints in param_grid['n_changepoints']:
                        for yearly_seasonality in param_grid['yearly_seasonality']:
                            try:
                                model = Prophet(
                                    yearly_seasonality=yearly_seasonality,
                                    weekly_seasonality=False,
                                    daily_seasonality=False,
                                    changepoint_prior_scale=changepoint_prior,
                                    seasonality_mode=seasonality_mode,
                                    changepoint_range=changepoint_range,
                                    n_changepoints=n_changepoints
                                )
                                model.fit(train_df)

                                future = model.make_future_dataframe(periods=len(test_y['y']), freq='Y')
                                forecast = model.predict(future)
                                predictions = forecast['yhat'].iloc[-len(test_y['y']):].values
                                rmse = np.sqrt(mean_squared_error(test_y['y'], predictions))

                                if rmse < best_rmse:
                                    best_rmse = rmse
                                    best_params = {
                                        'changepoint_prior_scale': changepoint_prior,
                                        'seasonality_mode': seasonality_mode,
                                        'changepoint_range': changepoint_range,
                                        'n_changepoints': n_changepoints,
                                        'yearly_seasonality': yearly_seasonality
                                    }
                            except Exception as e:
                                print(f"Error with parameters: changepoint_prior={changepoint_prior}, "
                                      f"seasonality_mode={seasonality_mode}, changepoint_range={changepoint_range}, "
                                      f"n_changepoints={n_changepoints}, yearly_seasonality={yearly_seasonality}. Error: {e}")
                                continue

        # Save the best parameters to JSON if found
        if best_params:
            os.makedirs(params_dir, exist_ok=True)
            with open(params_file, "w") as f:
                json.dump(best_params, f, indent=4)

    # Train the best model using loaded or selected parameters
    if best_params:
        best_model = Prophet(
            yearly_seasonality=best_params['yearly_seasonality'],
            weekly_seasonality=False,
            daily_seasonality=False,
            changepoint_prior_scale=best_params['changepoint_prior_scale'],
            seasonality_mode=best_params['seasonality_mode'],
            changepoint_range=best_params['changepoint_range'],
            n_changepoints=best_params['n_changepoints']
        )
        best_model.fit(train_df)
        future = best_model.make_future_dataframe(periods=len(test_y['y']), freq='Y')
        forecast = best_model.predict(future)
        predictions = forecast['yhat'].iloc[-len(test_y['y']):].values

        # Call the save_plot function to save the plot
        save_plot(train_df, test_y, predictions, country, indicator, model_name="Prophet")

        return np.sqrt(mean_squared_error(test_y['y'], predictions)), predictions
    else:
        print("No suitable parameters found.")
        return None, None






with open("../countries.json", "r") as f:
    country_names = json.load(f)

with open("../indicators.json", "r") as f:
    indicators = json.load(f)

data_folder = "../data/base"
model_errors_rmse = {}
log_data = []
country_indicators_plots = {}
for country, country_code in country_names.items():
    for indicator, indicator_code in indicators.items():
        filename = f"{country.replace(' ', '_')}_{indicator.replace(' ', '_')}.parquet"
        filepath = os.path.join(data_folder, filename)
        
        if os.path.exists(filepath):
            df = pd.read_parquet(filepath)
            if 'Year' in df.columns and 'Value' in df.columns:
                df = df.set_index('Year').sort_index()
                df.index = pd.to_datetime(df.index, format='%Y')
                df = df.dropna()
                df = df.drop('Indicator', axis = 1)
                df_original = df.copy()
                
                
                #df = df.dropna()
                train_size = int(len(df) * 0.8)
                

                
                model_errors_rmse[(country, indicator)] = {}


                prophet_train_df = df_original.iloc[:train_size].reset_index().rename(columns={'Year': 'ds', 'Value': 'y'})
                prophet_test_df = df_original.iloc[train_size:].reset_index().rename(columns={'Year': 'ds', 'Value': 'y'})
                prophet_train_df['ds'] = pd.to_datetime(prophet_train_df['ds'])
                prophet_test_df['ds'] = pd.to_datetime(prophet_test_df['ds'])
                model_errors_rmse[(country, indicator)]['Prophet'] , prop_error = train_prophet(prophet_train_df, prophet_test_df,
                                                                                   country,indicator)
                
                sorted_models = sorted(model_errors_rmse[(country, indicator)].items(), key=lambda x: x[1])
                log_current_data = []
                for rank, (model_name, rmse) in enumerate(sorted_models, start=1):
                    log_data.append([country, indicator, model_name, rmse, rank])
                    log_current_data.append([country, indicator, model_name, rmse, rank])

                
from datetime import datetime
model ="Prophet"
log_dir = f"../data/{model}_train"
os.makedirs(log_dir, exist_ok=True)

timestamp = datetime.now().strftime("%Y-%m-%d--%H-%M")
log_filename = os.path.join(log_dir, f"{model}_error_log_{timestamp}.csv")

log_df = pd.DataFrame(log_data, columns=['Country', 'Indicator', 'Model', 'RMSE', 'Rank'])
log_df.to_csv(log_filename, index=False)


  from .autonotebook import tqdm as notebook_tqdm
20:02:48 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\GDP per Capita (USD)\Prophet_Czech Republic.json...


20:02:48 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:02:48 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\GDP (USD)\Prophet_Czech Republic.json...


20:02:48 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:02:49 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Inflation (CPI)\Prophet_Czech Republic.json...


20:02:58 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:02:59 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Unemployment Rate (%)\Prophet_Czech Republic.json...


20:02:59 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:02:59 - cmdstanpy - INFO - Chain [1] start processing
20:02:59 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP growth (annual %)\Prophet_Czech Republic.json...


  dates = pd.date_range(
20:03:00 - cmdstanpy - INFO - Chain [1] start processing
20:03:00 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\Imports of goods and services (% of GDP)\Prophet_Czech Republic.json...


  dates = pd.date_range(
20:03:00 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Exports of goods and services (% of GDP)\Prophet_Czech Republic.json...


20:03:00 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:01 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\GDP per Capita (USD)\Prophet_Hungary.json...


20:03:01 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:01 - cmdstanpy - INFO - Chain [1] start processing
20:03:01 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP (USD)\Prophet_Hungary.json...


  dates = pd.date_range(
20:03:02 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Inflation (CPI)\Prophet_Hungary.json...


20:03:02 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:02 - cmdstanpy - INFO - Chain [1] start processing
20:03:02 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\Unemployment Rate (%)\Prophet_Hungary.json...


  dates = pd.date_range(
20:03:03 - cmdstanpy - INFO - Chain [1] start processing
20:03:03 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP growth (annual %)\Prophet_Hungary.json...


  dates = pd.date_range(
20:03:03 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Imports of goods and services (% of GDP)\Prophet_Hungary.json...


20:03:03 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:04 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Exports of goods and services (% of GDP)\Prophet_Hungary.json...


20:03:04 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:04 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\GDP per Capita (USD)\Prophet_Poland.json...


20:03:04 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:05 - cmdstanpy - INFO - Chain [1] start processing
20:03:05 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP (USD)\Prophet_Poland.json...


  dates = pd.date_range(
20:03:05 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Inflation (CPI)\Prophet_Poland.json...


20:03:05 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:06 - cmdstanpy - INFO - Chain [1] start processing
20:03:06 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\Unemployment Rate (%)\Prophet_Poland.json...


  dates = pd.date_range(
20:03:06 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\GDP growth (annual %)\Prophet_Poland.json...


20:03:06 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:07 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Imports of goods and services (% of GDP)\Prophet_Poland.json...


20:03:07 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:07 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Exports of goods and services (% of GDP)\Prophet_Poland.json...


20:03:07 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:08 - cmdstanpy - INFO - Chain [1] start processing
20:03:08 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP per Capita (USD)\Prophet_Slovakia.json...


  dates = pd.date_range(
20:03:08 - cmdstanpy - INFO - Chain [1] start processing
20:03:08 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP (USD)\Prophet_Slovakia.json...


  dates = pd.date_range(
20:03:09 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Inflation (CPI)\Prophet_Slovakia.json...


20:03:09 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:09 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Unemployment Rate (%)\Prophet_Slovakia.json...


20:03:09 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:10 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\GDP growth (annual %)\Prophet_Slovakia.json...


20:03:10 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:10 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Imports of goods and services (% of GDP)\Prophet_Slovakia.json...


20:03:11 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:11 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Exports of goods and services (% of GDP)\Prophet_Slovakia.json...


20:03:12 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:12 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\GDP per Capita (USD)\Prophet_Germany.json...


20:03:12 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:13 - cmdstanpy - INFO - Chain [1] start processing
20:03:13 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP (USD)\Prophet_Germany.json...


  dates = pd.date_range(
20:03:13 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Inflation (CPI)\Prophet_Germany.json...


20:03:13 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:14 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Unemployment Rate (%)\Prophet_Germany.json...


20:03:14 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:14 - cmdstanpy - INFO - Chain [1] start processing
20:03:14 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP growth (annual %)\Prophet_Germany.json...


  dates = pd.date_range(
20:03:15 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Imports of goods and services (% of GDP)\Prophet_Germany.json...


20:03:15 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:15 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Exports of goods and services (% of GDP)\Prophet_Germany.json...


20:03:15 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:16 - cmdstanpy - INFO - Chain [1] start processing
20:03:16 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP per Capita (USD)\Prophet_Austria.json...


  dates = pd.date_range(
20:03:16 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\GDP (USD)\Prophet_Austria.json...


20:03:16 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:17 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Inflation (CPI)\Prophet_Austria.json...


20:03:17 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:17 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Unemployment Rate (%)\Prophet_Austria.json...


20:03:17 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:18 - cmdstanpy - INFO - Chain [1] start processing
20:03:18 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP growth (annual %)\Prophet_Austria.json...


  dates = pd.date_range(
20:03:18 - cmdstanpy - INFO - Chain [1] start processing
20:03:18 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\Imports of goods and services (% of GDP)\Prophet_Austria.json...


  dates = pd.date_range(
20:03:19 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Exports of goods and services (% of GDP)\Prophet_Austria.json...


20:03:19 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:19 - cmdstanpy - INFO - Chain [1] start processing
20:03:19 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP per Capita (USD)\Prophet_France.json...


  dates = pd.date_range(
20:03:20 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\GDP (USD)\Prophet_France.json...


20:03:20 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:20 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Inflation (CPI)\Prophet_France.json...


20:03:20 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:21 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Unemployment Rate (%)\Prophet_France.json...


20:03:21 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:21 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\GDP growth (annual %)\Prophet_France.json...


20:03:22 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:22 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Imports of goods and services (% of GDP)\Prophet_France.json...


20:03:22 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:23 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Exports of goods and services (% of GDP)\Prophet_France.json...


20:03:23 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:23 - cmdstanpy - INFO - Chain [1] start processing
20:03:23 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP per Capita (USD)\Prophet_Italy.json...


  dates = pd.date_range(
20:03:23 - cmdstanpy - INFO - Chain [1] start processing
20:03:24 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\GDP (USD)\Prophet_Italy.json...


  dates = pd.date_range(
20:03:24 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Inflation (CPI)\Prophet_Italy.json...


20:03:24 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:25 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Unemployment Rate (%)\Prophet_Italy.json...


20:03:25 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:25 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\GDP growth (annual %)\Prophet_Italy.json...


20:03:25 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
20:03:26 - cmdstanpy - INFO - Chain [1] start processing
20:03:26 - cmdstanpy - INFO - Chain [1] done processing


Loading parameters from ../best_params\Imports of goods and services (% of GDP)\Prophet_Italy.json...


  dates = pd.date_range(
20:03:26 - cmdstanpy - INFO - Chain [1] start processing


Loading parameters from ../best_params\Exports of goods and services (% of GDP)\Prophet_Italy.json...


20:03:27 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(


### OLD VERSIONS

In [None]:
import os
import json
import pandas as pd
import numpy as np
import xgboost as xgb
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import RFE
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense , Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

def save_plot(train, test_y, predictions, country, indicator, model_name):
    """Function to save the plot in both Indicators and Countries folders."""

    model_colors = {
    "ARIMA": "blue",
    "Holt_Winters": "yellow",
    "LSTM": "black",
    "XGBoost": "pink",
    "Prophet": "brown"
}

    # Plotting predicted vs actual
    plt.figure(figsize=(10, 6))
    if model_name == "Prophet":
        plt.plot(train['ds'], train['y'], label='Train Data', color='green', linestyle='--')
        plt.plot(test_y['ds'], test_y['y'], label='Actual', color='red', linestyle='--')
        plt.plot(test_y['ds'], predictions, label=f'Predicted({model_name})', color=f'{model_colors["Prophet"]}', 
                 linestyle='-', marker='o')
    else:
        plt.plot(train.index, train, label='Train Data', color='green', linestyle='--')
        plt.plot(test_y.index, test_y, label='Actual', color='red', linestyle='--')
        plt.plot(test_y.index, predictions, label=f'Predicted({model_name})', color=f'{model_colors[model_name]}', 
                 linestyle='-', marker='o')
    

    
    plt.title(f'Predicted({model_name}) vs Actual for {country} - {indicator}')
    plt.xlabel('Year')
    plt.ylabel('Value')
    plt.legend()

    # Create subfolder for the indicator if it doesn't exist
    indicator_folder = os.path.join('../images', 'model_plot', 'Indicators', indicator)
    os.makedirs(indicator_folder, exist_ok=True)
    
    # Save the plot in the Indicators folder with dynamic model name
    plot_filename_indicator = os.path.join(indicator_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_indicator)

    # Create subfolder for the country if it doesn't exist
    country_folder = os.path.join('../images', 'model_plot', 'Countries', country)
    os.makedirs(country_folder, exist_ok=True)
    
    # Save the same plot in the Countries folder with dynamic model name
    plot_filename_country = os.path.join(country_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_country)

    plt.close()




def train_prophet(train_df, test_y,country,indicator):
    #train_df['ds'] = pd.to_datetime(train_df['ds'])  # Ensure datetime format
    
    param_grid = {
        'changepoint_prior_scale': [0.01, 0.1, 0.5],
        'seasonality_mode': ['additive', 'multiplicative'],
    }

    best_rmse = float('inf')
    best_params = None

    for changepoint_prior in param_grid['changepoint_prior_scale']:
        for seasonality_mode in param_grid['seasonality_mode']:
            model = Prophet(
                yearly_seasonality=True,
                weekly_seasonality=False,
                daily_seasonality=False,
                changepoint_prior_scale=changepoint_prior,
                seasonality_mode=seasonality_mode
            )
            model.fit(train_df)

            future = model.make_future_dataframe(periods=len(test_y['y']), freq='Y')
            forecast = model.predict(future)
            predictions = forecast['yhat'].iloc[-len(test_y['y']):].values
            rmse = np.sqrt(mean_squared_error(test_y['y'], predictions))

            if rmse < best_rmse:
                best_rmse = rmse
                best_params = (changepoint_prior, seasonality_mode)

    # Train best model
    best_model = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=False,
        daily_seasonality=False,
        changepoint_prior_scale=best_params[0],
        seasonality_mode=best_params[1]
    )
    best_model.fit(train_df)
    future = best_model.make_future_dataframe(periods=len(test_y['y']), freq='Y')
    forecast = best_model.predict(future)
    predictions = forecast['yhat'].iloc[-len(test_y['y']):].values

    save_plot(train_df, test_y, predictions, country, indicator, model_name="Prophet")
    return np.sqrt(mean_squared_error(test_y['y'], predictions)) , predictions




with open("../countries.json", "r") as f:
    country_names = json.load(f)

with open("../indicators.json", "r") as f:
    indicators = json.load(f)

data_folder = "../data/base"
model_errors_rmse = {}
log_data = []
country_indicators_plots = {}
for country, country_code in country_names.items():
    for indicator, indicator_code in indicators.items():
        filename = f"{country.replace(' ', '_')}_{indicator.replace(' ', '_')}.parquet"
        filepath = os.path.join(data_folder, filename)
        
        if os.path.exists(filepath):
            df = pd.read_parquet(filepath)
            if 'Year' in df.columns and 'Value' in df.columns:
                df = df.set_index('Year').sort_index()
                df.index = pd.to_datetime(df.index, format='%Y')
                df = df.dropna()
                df = df.drop('Indicator', axis = 1)
                df_original = df.copy()
                
                
                #df = df.dropna()
                train_size = int(len(df) * 0.8)
                

                
                model_errors_rmse[(country, indicator)] = {}


                prophet_train_df = df_original.iloc[:train_size].reset_index().rename(columns={'Year': 'ds', 'Value': 'y'})
                prophet_test_df = df_original.iloc[train_size:].reset_index().rename(columns={'Year': 'ds', 'Value': 'y'})
                prophet_train_df['ds'] = pd.to_datetime(prophet_train_df['ds'])
                prophet_test_df['ds'] = pd.to_datetime(prophet_test_df['ds'])
                model_errors_rmse[(country, indicator)]['Prophet'] , prop_error = train_prophet(prophet_train_df, prophet_test_df,
                                                                                   country,indicator)
                
                sorted_models = sorted(model_errors_rmse[(country, indicator)].items(), key=lambda x: x[1])
                log_current_data = []
                for rank, (model_name, rmse) in enumerate(sorted_models, start=1):
                    log_data.append([country, indicator, model_name, rmse, rank])
                    log_current_data.append([country, indicator, model_name, rmse, rank])

                
from datetime import datetime
model ="Prophet"
log_dir = f"../data/{model}_train"
os.makedirs(log_dir, exist_ok=True)

timestamp = datetime.now().strftime("%Y-%m-%d--%H-%M")
log_filename = os.path.join(log_dir, f"{model}_error_log_{timestamp}.csv")

log_df = pd.DataFrame(log_data, columns=['Country', 'Indicator', 'Model', 'RMSE', 'Rank'])
log_df.to_csv(log_filename, index=False)
