In [None]:
import yfinance as yf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import datetime
import io
import datetime
import matplotlib.lines as mlines
import statsmodels.formula.api as smf
import datetime

# Data

In [None]:
df = pd.read_csv('final_project_data.csv', parse_dates = True, index_col = 0)
df.tail(15)

In [None]:
#we find the first value of CPI in order the delet the NaN values that are before that date
initial_date = df['UK YoY CPI'].first_valid_index() 
#.fist_valid_index() returns the first non-NaN value
initial_date

In [None]:
#Lets transform the date to a string, keeping the format YYYY-DD-MM
initial_date_2 = initial_date.strftime('%Y-%m-%d')
initial_date_2

In [None]:
usd_to_gbp_rate.head(4)

In [None]:
#we are gonna drop all the values before initial_date
df = df[initial_date:]
df.head()

In [None]:
#Im gonna transform the USD2YB and UK2YB to a log return
df['USD2YB'] = np.log(df['USD2YB'])
df['UK2YB'] = np.log(df['UK2YB'])


In [None]:
nan_values = df.isnull().sum()
print("NaN values in each column:")
print(nan_values)



In [None]:
initial_date_3 = usd_to_gbp_rate['Adj Close'].first_valid_index() 
initial_date_3

In [None]:
df = df.merge(usd_to_gbp_rate["log_return"], left_index=True, right_index=True, how='left')

In [None]:
#Im gonna resample the data to a monthly frequency. For USD2YB and UK2YB we are gonna use the sum of the log returns, and for the rest of the columns we are gonna use the last value of the month.

df2 = df.resample('M').agg({'USD2YB':'sum', 'UK2YB':'sum', 'UK YoY CPI':'last', 'US YoY CPI':'last', 'UK industrial Index':'last', 'US Industrial Index':'last', 'log_return':'sum'})
df2

In [None]:
#we get rid of the last two rows of the dataframes
df2 = df2[:-2]

In [None]:
df2.isnull().sum()

In [None]:
df2.reset_index(inplace=True)
df2.rename(columns={'UK industrial Index': 'UK_Industrial_Index', 'US Industrial Index': 'US_Industrial_Index'}, inplace=True)
df2

In [None]:
df2.head(15)

In [None]:
## Independent Variables
# inf_us, inf_df2 : us inflation at t, df2 inflation at t
inf_df2 = df2.loc[26:len(df2)-2,'UK YoY CPI'].reset_index(drop=True)
inf_us = df2.loc[26:len(df2)-2,'US YoY CPI'].reset_index(drop=True)
inf_diff = inf_us - inf_df2
inf_diff = inf_diff.rename('inf_diff')

# output_us, output_df2 : us output at t, df2 output at t
output_uk = df2.loc[26:len(df2)-2,'UK_Industrial_Index'].reset_index(drop=True)
output_us = df2.loc[26:len(df2)-2,'US_Industrial_Index'].reset_index(drop=True)


# date : Time at t+1
date = df2.loc[27:,'Date'].reset_index(drop=True)


# Combine all variables into a new data set
df2_const = pd.concat([date,  s_change, inf_diff, output_uk, output_us], axis=1)
df2_const.head(10)

# Strategy

In [None]:
## Estimate of Output Gap
# Linear Trend
# Output Gap in df2 and US

df2_const = df2_const.copy()
df2 = df2.copy()
df2_const.rename(columns={'UK industrial Index': 'UK_Industrial_Index', 'US Industrial Index': 'US_Industrial_Index'}, inplace=True)

df2['t'] = pd.DataFrame({'t' : range(1,len(df2)+1)})

for i in range(0,len(df2_const)): # Updating the potential output each period
    
    # note that we are expanding the traning set each loop, starting from 0:27
    linear_trend_df2 = smf.ols(formula = 'UK_Industrial_Index ~ t', data=df2[0:27+i]).fit()
    
    # we make a prediction for the next period
    df2_const.loc[i,'df2_potential'] = linear_trend_df2.predict(df2['t'][27+i:28+i])[27+i]
    
    # Repeat for the US
    linear_trend_us = smf.ols(formula = 'US_Industrial_Index ~ t', data=df2[0:27+i]).fit()
    df2_const.loc[i,'us_potential'] = linear_trend_us.predict(df2['t'][27+i:28+i])[27+i]
    
df2_const['df2_gap'] = (df2_const['UK_Industrial_Index'] - df2_const['df2_potential'])/df2_const['df2_potential']
df2_const['us_gap'] = (df2_const['US_Industrial_Index'] - df2_const['us_potential'])/df2_const['us_potential']


# Output Gap differential between US and df2
df2_const['gap_diff'] = df2_const['us_gap'] - df2_const['df2_gap']


# Keep variables only used for our model
df2_const=df2_const[['Date', 's_change', 'inf_diff', 'gap_diff']]

In [None]:
df2_const.head(10)

In [None]:
# Create empty columns for fitted values of changes i.e. changes of log exchange rates
df2_const['s_change_fitted'] = np.nan

# Out-of-sample Forecasts with rolling window : Here, we use the first 120 observations
for i in range(120,len(df2_const)):

    # we regress the change in teh exchange rate on the taylor rule fundamentals
    tmp = smf.ols(formula = 's_change ~ inf_diff + gap_diff', data=df2_const[i-120:i]).fit()
    
    # make a prediction for the next period
    df2_const.loc[i,'s_change_fitted'] = tmp.predict(df2_const[['inf_diff', 'gap_diff']][i:i+1])[i]

# # Forecasts
# df2_const['s_forecast'] = df2_const['s_change_fitted'] + df2_const['s_current']

# Forecast Error
df2_const['error'] = df2_const['s_change']-df2_const['s_change_fitted']


df2_const.tail(10)

In [None]:
df3 = df2_const.dropna()
df3 = df3.copy()

# Go long if we forecast the exchange rate will increase, otherwise short
df3['signal'] = np.where(df3['s_change_fitted'] >= 0, 1, -1)

# calculate returns
df3['returns'] = np.exp(df3['signal']*s_change)
df3['strategy_return'] = (np.exp((df3['signal']*s_change).cumsum())-1)*100
