In [None]:
pip install dtaidistance

In [None]:
### 1. Import Necessary Libraries ###

import pandas as pd
import numpy as np
from dtaidistance import dtw
import matplotlib.pyplot as plt

# Suppress scientific notation in NumPy
np.set_printoptions(suppress=True)

In [None]:
### 2. Load the GDP Data from a CSV File

# Load the GDP data
data = pd.read_csv('gdp_data2.csv')

# Display the first few rows of the dataset
print(data.head())

In [None]:
### 3. Define the Base Period ad Compute DTW Distances ###

# Define the target country and the base period
target_country = 'Cambodia'
base_period_length = 5
forecast_horizon = 11

# Extract GDP data for the target country
target_data = data[data['Country'] == target_country].sort_values(by='Year')
target_gdp = target_data['GDP'].values
target_years = target_data['Year'].values

# Define the base period [t-k+1, t]
base_period = target_gdp[-base_period_length:]

# Calculate DTW distances between the base period and all historical periods of other countries
distances = []
for country in data['Country'].unique():
    if country != target_country:
        country_gdp = data[data['Country'] == country].sort_values(by='Year')['GDP'].values
        for i in range(len(country_gdp) - base_period_length):
            period = country_gdp[i:i + base_period_length]
            distance = dtw.distance(base_period, period)
            distances.append((country, i, distance))

In [None]:
### 4. Identify Similar Periods in Other Countries' Histories

# Sort distances and select the top 1% closest periods
top_1_percent = int(len(distances) * 0.01)
top_periods = sorted(distances, key=lambda x: x[2])[:top_1_percent]

In [None]:
### 5. Forecast Future Growth Based on the Most Similar Periods

# Collect growth trajectories for the next y years
growth_trajectories = []
top_periods_filtered = []

for country, start_index, distance in top_periods:
    country_gdp = data [data['Country'] == country].sort_values(by='Year')['GDP'].values
    end_index = start_index + base_period_length + forecast_horizon
    if end_index < len(country_gdp):
        growth_trajectory = country_gdp[start_index + base_period_length: end_index]
        growth_trajectories.append(growth_trajectory)
        top_periods_filtered.append((country, start_index, distance))

# Ensure the growth_trajectories array has the correct shape
growth_trajectories = np.array(growth_trajectories)

### Calculate the forecasts
forecasted_growth_median = np.median(growth_trajectories, axis=0)
forecasted_growth_average = np.mean(growth_trajectories, axis=0)

# Aggregate growth rates using weighted average (weights inversely proportional to DTW distances)
weights = np.array([1/distance for _, _, distance in top_periods_filtered])
weighted_growth_trajectories = growth_trajectories.T * weights
forecasted_growth_weighted_avg = np.sum(weighted_growth_trajectories, axis=1) / np.sum(weights)

forecasted_growth_55th_percentile = np.percentile(growth_trajectories, 55, axis=0)
forecasted_growth_60th_percentile = np.percentile(growth_trajectories, 60, axis=0)

In [None]:
### 6. Calculate RMSE for Different Forecast Options

# Function to calculate RMSE
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

# Calculate RMSE for both forecast options on historical data
actual_growth = target_gdp[-forecast_horizon:]

rmse_median = rmse(forecasted_growth_median, actual_growth)
rmse_average = rmse(forecasted_growth_average, actual_growth)
rmse_weighted_avg = rmse(forecasted_growth_weighted_avg, actual_growth)
rmse_55th_percentile = rmse(forecasted_growth_55th_percentile, actual_growth)
rmse_60th_percentile = rmse(forecasted_growth_60th_percentile, actual_growth)

print(f"RMSE (Median): {rmse_median}")
print(f"RMSE (Average): {rmse_average}")
print(f"RMSE (Weighted Average): {rmse_weighted_avg}")
print(f"RMSE (55th Percentile): {rmse_55th_percentile}")
print(f"RMSE (60th Percentile): {rmse_60th_percentile}")

In [None]:
### 7. Visualize the Results ###

# Prepare the years for plotting
last_year = target_years[-1]
forecast_years = np.arange(last_year + 1, last_year + forecast_horizon + 1)

# Plot the results
plt.figure(figsize=(14, 8))
plt.plot(forecast_years, forecasted_growth_median, label='Median Forecast', linestyle='--', color='green')
plt.plot(forecast_years, forecasted_growth_average, label='Average Forecast', linestyle='--', color='orange')
plt.plot(forecast_years, forecasted_growth_weighted_avg, label='Weighted Average Forecast', linestyle='--', color='pink')
plt.plot(forecast_years, forecasted_growth_55th_percentile, label='55th Percentile Forecast', linestyle='--', color='cyan')
plt.plot(forecast_years, forecasted_growth_60th_percentile, label='60th Percentile Forecast', linestyle='--', color='purple')

plt.xlabel('Year')
plt.ylabel('GDP Growth (%)')
plt.title('Forecasted GDP Growth of Cambodia')
plt.legend()
plt.grid(True)
plt.show()