Import our libraries which we need, will download the rest as and when we need them

In [None]:
import pandas as pd

Read our datatbase, this is in a .dta format, so need to convert it

In [None]:
df = pd.read_stata("/Users/nickking/Desktop/Documents/University/Year 3/Dissertation/SUBMIT.dta")
df.head(10)

In [None]:
df.columns

Filtering by prices to see the values for stocks

In [None]:
close_columns = [col for col in df.columns if '_close' in col] #Just keeping the columns I want so that i can see the names and values for each of them

df[close_columns].head(10)


Importing numpy to perform mathematical operations and generating returns

In [None]:
import numpy as np

countries = ['uk', 'us', 'brazil', 'mexico', 'india']

for country in countries:
    df[f'return_{country}_close'] = np.log(df[f'{country}_close']).diff() #take the log, and then the difference of the log which is the percentage change

df.columns


Importing matplotlib to provide graphs

In [None]:
import matplotlib.pyplot as plt

Running a line graph for the returns of each country in the database

In [None]:
countries = ['uk', 'us', 'brazil', 'mexico', 'india']

for country in countries:
    plt.figure(figsize=(8, 4))
    plt.plot(df['monthly_date'], df[f'return_{country}_close']) #This is the line option
    plt.title(f'{country} Returns')
    plt.xlabel('Date')
    plt.ylabel('Return')
    plt.tight_layout() # this gets it so that they are on different graphs
    plt.show()

Kernel Density plot for our returns, can show which are most "normal"

In [None]:
countries = ['uk', 'us', 'brazil', 'mexico', 'india']

for country in countries:
    df[f'return_{country}_close'].plot.kde() # This uses pandas directly, hence the different format

plt.title('Kernel Density of Returns')
plt.xlabel('Returns')
plt.legend()

Now separating the data I want to use in the VAR

In [None]:
countries = ['uk', 'us', 'brazil', 'mexico', 'india']
var_data_dict = {} # Start a dictionary to store the values for each country which will then be used in the VAR

for country in countries:
    columns = [
        f'return_{country}_close',
        f'{country}_interest',
        f'{country}_NEXP_GDP',
        f'{country}_kof',
        f'log_{country}_epu',
        f'log_{country}_volume',
        'log_volatility_close'
    ]
    
    try:
        var_data = df[columns].dropna() # Drops the rows where they are missing values from the columns which are specified
        var_data_dict[country] = var_data # Fills the dictionary with values from each country
    except KeyError as e:
        print(f"Missing column for {country}: {e}")


Making sure our index is in datetime

In [None]:
df['monthly_date'] = pd.to_datetime(df['monthly_date']) # Make sure we are in datetime- probably should have been done sooner :/

df.set_index('monthly_date', inplace=True) # Our index is monthly date, i.e. the first date is 1, last is 4186
df.index.freq = pd.infer_freq(df.index) # Detects the correct frequency of the index so that can be mapped
print(df.index.freq)
print(type(df.index))
print(df.index)

And importing the library + fit the model

In [None]:
from statsmodels.tsa.api import VAR

forecast_dict = {}  # Store forecast DataFrames for each country

for country, var_data in var_data_dict.items():
    print(f"--- {country.upper()} ---")

    var_data_clean = var_data.copy().reset_index(drop=True)
    
    try:
        model = VAR(var_data_clean)
        results = model.fit(5)

        forecast = results.forecast(var_data_clean.values[-5:], steps=5)

        # Create forecast DataFrame
        forecast_df = pd.DataFrame(forecast, columns=var_data_clean.columns)

        # Forecasts after the last date
        last_date = df.index[-1]
        forecast_index = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=5, freq='B')
        forecast_df.index = forecast_index

        # Store in dictionary
        forecast_dict[country] = forecast_df

        display(forecast_df)

    except Exception as e:
        print(f"Error processing {country}: {e}")


plotting the returns of each country

In [None]:
for country in countries:
    plt.figure(figsize=(10, 5))

    hist = df.iloc[4100:][f'return_{country}_close'] # Create a df for data from after 4100 in the index using return values
    plt.plot(hist.index, hist, label='Historical') # This plots the data, including the actual data and the forecast values

    forecast_df = forecast_dict.get(country) # Recalls out dictionary to get the countries

    if forecast_df is not None and f'return_{country}_close' in forecast_df.columns: # Plots our forecasted values with the actual values, and marks the forecast period
        plt.plot(forecast_df.index, forecast_df[f'return_{country}_close'], linestyle='--', label='Forecast')
        plt.axvspan(forecast_df.index[0], forecast_df.index[-1], color='orange', alpha=0.3)

    plt.title(f'{country.upper()} Returns + Forecast')
    plt.xlabel('Date')
    plt.ylabel('Return')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()
