###  **Question 3: IAQF Academic Competition**


*Imports*

In [16]:
import pandas as pd 
import numpy as np
import yfinance as yf
from datetime import date
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression

##### **a) What are the implications of the noted concentration for that strategy?**

**Data**

*Reading*

In [17]:
sector_etfs = {
    'Baseline': {
        "Large-Cap": "SPY",
        "Small-Cap": "^SP600"
    },
    "Technology": {
        "Large-Cap": "XLK",
        "Small-Cap": "PSCT"
    },
    "Healthcare": {
        "Large-Cap": "XLV",
        "Small-Cap": "PSCH"
    },
    "Financials": {
        "Large-Cap": "XLF",
        "Small-Cap": "PSCF"
    },
    "Consumer Discretionary": {
        "Large-Cap": "XLY",
        "Small-Cap": "PSCD"
    },
    "Consumer Staples": {
        "Large-Cap": "XLP",
        "Small-Cap": "PSCC"
    },
    "Industrials": {
        "Large-Cap": "XLI",
        "Small-Cap": "PSCI"
    },
    "Energy": {
        "Large-Cap": "XLE",
        "Small-Cap": "PSCE"
    },
    "Materials": {
        "Large-Cap": "XLB",
        "Small-Cap": "PSCM"
    },
    "Utilities": {
        "Large-Cap": "XLU",
        "Small-Cap": "PSCU"
    },
    "Real Estate": {
        "Large-Cap": "XLRE",
        "Small-Cap": "ROOF"
    }
}

In [18]:
tickers = [_ for x in sector_etfs.values() for _ in x.values()]
start_date = date(2010,1,1)
end_date = date(2025,1,1)

# ETF Returns
prices = yf.download(tickers = tickers,start = start_date,end = end_date)['Close']

ff_data = pd.read_csv('../extra-data/FF_3Factor.CSV',skiprows = 4,index_col=0)
ff5_data = pd.read_csv('../extra-data/FF_5Factor.csv',index_col=0)

[*********************100%***********************]  22 of 22 completed


*Cleaning*

In [19]:
returns = prices.pct_change().dropna()

# Clean FF Data
ff_data = ff_data.iloc[:-1]
ff_data.index = pd.to_datetime(ff_data.index).date

ff5_data.index = pd.to_datetime(ff5_data.index).date


The default fill_method='pad' in DataFrame.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.



##### *i)* Baseline Strategy


Baseline strategy is underperforming, show how this is connected to tech exposure

In [20]:
baseline_strategy = pd.DataFrame(returns['^SP600'] - returns['SPY'],columns=['daily_returns'])
baseline_strategy['cumulative_returns'] = baseline_strategy['daily_returns'].cumsum()

##### *ii)* Industry Sector Basis

Show the strategy performance at a sector basis

In [21]:
sector_baseline_returns = {}

for sector,(large,small) in sector_etfs.items():
    sector_baseline_returns[sector] = (returns[sector_etfs[sector][small]] - returns[sector_etfs[sector][large]]).cumsum()

sector_baseline_returns = pd.DataFrame(sector_baseline_returns)

##### *iii)* Risk Attribution (Fama-French)

In [22]:
import statsmodels.api as sm
# Merge datasets
df = baseline_strategy.join(ff5_data, how='inner')

# Calculate excess returns
df['Excess_Return'] = df['daily_returns'] - df['rf']

# Run regression
X = df[['smb', 'hml', 'rmw', 'cma','umd']]
X = sm.add_constant(X)  # Add intercept
y = df['Excess_Return']

model = sm.OLS(y, X).fit()
print(model.summary())

# Extract risk contribution
betas = model.params[1:]  # Exclude intercept
factor_vols = df[['smb', 'hml', 'rmw', 'cma','umd']].std()  # Factor standard deviations

# Compute risk contribution
risk_contributions = (betas**2 * factor_vols**2) / sum(betas**2 * factor_vols**2)
print("Risk Attribution:\n", risk_contributions)

                            OLS Regression Results                            
Dep. Variable:          Excess_Return   R-squared:                       0.933
Model:                            OLS   Adj. R-squared:                  0.933
Method:                 Least Squares   F-statistic:                     2087.
Date:                Sat, 22 Feb 2025   Prob (F-statistic):               0.00
Time:                        12:49:04   Log-Likelihood:                 3557.5
No. Observations:                 753   AIC:                            -7103.
Df Residuals:                     747   BIC:                            -7075.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0003   7.88e-05     -3.639      0.0

In [23]:
risk_contributions

smb    0.936011
hml    0.054044
rmw    0.008694
cma    0.001240
umd    0.000011
dtype: float64

#### **Plots**

*i)* Baseline Strategy Performance

In [14]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x = baseline_strategy.index,
        y = baseline_strategy['cumulative_returns'],
        name = 'Baseline Strategy'
    )
)

fig.update_layout(title = 'Benchmark Strategy Returns')
fig.update_layout(
                  showlegend=True,
                  margin=dict(l=10, r=10, t=50, b=10),
                  legend=dict(orientation="h",yanchor="top",y=-0.1,xanchor="center",x=0.5),
                  width = 800,height = 550,
                  xaxis_title = 'Date',
                  xaxis=dict(title_standoff=3) 
                )
fig.update_yaxes(title = 'Cumulative Simple Returns')
fig.update_xaxes(title = 'Date')
fig.show()

*ii)* Strategy at Sector Level

In [12]:
fig = go.Figure()


for strategy in sector_baseline_returns:

    fig.add_trace(
        go.Scatter(
            x = sector_baseline_returns.index,
            y = sector_baseline_returns[strategy],
            name = strategy
        )
    )

fig.update_layout(title = 'Strategy Sector Level Returns')
fig.update_layout(
                  showlegend=True,
                  margin=dict(l=10, r=10, t=50, b=10),
                  legend=dict(orientation="h",yanchor="top",y=-0.1,xanchor="center",x=0.5),
                  width = 800,height = 550,
                  xaxis_title = 'Date',
                  xaxis=dict(title_standoff=3) 
                )
fig.update_yaxes(title = 'Cumulative Simple Returns')
fig.update_xaxes(title = 'Date')
fig.show()

##### **b) How would you modify the trading strategy to correct for any implication you may have found?**

In [11]:
small_cap = ['PSCT', 'PSCH', 'PSCF','PSCD', 'PSCC', 'PSCI', 'PSCE', 'PSCM','PSCU','ROOF']
large_cap = ['XLK', 'XLV', 'XLF', 'XLY', 'XLP', 'XLI', 'XLE', 'XLB', 'XLU', 'XLRE']

start_date = date(2020,1,1)
end_date = date(2025,1,1)

data = yf.download(tickers = small_cap + large_cap,start = start_date, end = end_date)['Close']

returns = data.pct_change().dropna()

[*********************100%***********************]  20 of 20 completed

The default fill_method='pad' in DataFrame.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.



In [12]:
class Momentum:

    def __init__(self, data, returns, lookback, rebalance, type = 'Equal'):
        # Inputs
        self.data = data
        self.returns = returns
        self.lookback = lookback
        self.rebalance = rebalance
        self.type = type

        # Stats
        self.weights = pd.Series(0.0, index=self.returns.columns, dtype=float)
        self.portfolio_returns = pd.Series(0.0, index=self.returns.index, dtype=float)

    def run(self):

        for t in range(self.lookback, len(self.returns.index)):
            # Compute portfolio return for the day
            daily_return = (self.weights * self.returns.iloc[t]).sum()
            self.portfolio_returns.iloc[t] = daily_return

            if t % self.rebalance == 0:

                # Get window returns
                small_cap_window_returns = self.returns.iloc[t - self.lookback:t,][small_cap]
                large_cap_window_returns = self.returns.iloc[t - self.lookback:t,][large_cap]

                # Calculate momentum, long and short threshold
                momentum_small_cap = self.signal(small_cap_window_returns)
                momentum_large_cap = self.signal(large_cap_window_returns)

                # Select assets in the top and bottom deciles
                long_assets = momentum_small_cap.nlargest(3).index
                short_assets = momentum_large_cap.nsmallest(3).index

                # Equally Weighted Portfolio
                self.weights = pd.Series(0.0, index=self.returns.columns, dtype=float)
                self.weights[long_assets] = 1 / len(long_assets) if len(long_assets) > 0 else 0
                self.weights[short_assets] = -1 / len(short_assets) if len(short_assets) > 0 else 0

            

        # Clean + Compute Stats
        self._clean_returns()
        self._compute_stats()

    def signal(self, window_returns):
        """Calculate Momentum Signals"""
        if self.type == 'Equal':

            # Calculate momentum (cumulative returns over the lookback period)
            momentum = (1 + window_returns).prod() - 1 / np.sqrt(window_returns.var())
    
            return momentum
        
    def _clean_returns(self):
        """Clean Portfolio Returns"""

        _df = pd.DataFrame(self.portfolio_returns,columns=['daily_return'])
        _df['cumulative_return'] = (1 + _df['daily_return']).cumprod()
        
        self.portfolio_returns = _df

    def _compute_stats(self):
        """Compute Portfolio Stats"""
        
        # Annualized Return
        annual_ret = self.portfolio_returns['daily_return'].mean()*252

        # Volatility
        annual_vol = self.portfolio_returns['daily_return'].std()*np.sqrt(252)

        # Sharpe Ratio
        sharpe_ratio = annual_ret / annual_vol

        self.stats = {'Sharpe Ratio': round(sharpe_ratio,2),
                      'Annual Return': round(annual_ret,2),
                        'Annual Volatility': round(annual_vol,2)}

In [13]:
m = Momentum(data,returns,120,30,'Equal')
m.run()
portfolio_returns = m.portfolio_returns

In [14]:
import statsmodels.api as sm
# Merge datasets
df = portfolio_returns.join(ff5_data, how='inner')

# Calculate excess returns
df['Excess_Return'] = df['daily_return'] - df['rf']

# Run regression
X = df[['smb', 'hml', 'rmw', 'cma','umd']]
X = sm.add_constant(X)  # Add intercept
y = df['Excess_Return']

model = sm.OLS(y, X).fit()
print(model.summary())

# Extract risk contribution
betas = model.params[1:]  # Exclude intercept
factor_vols = df[['smb', 'hml', 'rmw', 'cma','umd']].std()  # Factor standard deviations

# Compute risk contribution
risk_contributions = (betas**2 * factor_vols**2) / sum(betas**2 * factor_vols**2)
print("Risk Attribution:\n", risk_contributions)

                            OLS Regression Results                            
Dep. Variable:          Excess_Return   R-squared:                       0.538
Model:                            OLS   Adj. R-squared:                  0.535
Method:                 Least Squares   F-statistic:                     174.0
Date:                Fri, 21 Feb 2025   Prob (F-statistic):          1.23e-122
Time:                        13:28:08   Log-Likelihood:                 2345.2
No. Observations:                 753   AIC:                            -4678.
Df Residuals:                     747   BIC:                            -4651.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0005      0.000      1.204      0.2

##### **c) Can you provide a backtest comparison between the standard strategy and the modified strategy?**