# Attribution Analysis

In [1]:
import pandas as pd
import numpy as np
from pandas_datareader import DataReader as pdr
import yfinance as yf
import plotly.graph_objects as go
import statsmodels.api as sm
pd.options.display.float_format = '{:.4f}'.format

In [2]:
# Function to pull returns
def returns(ticker):
    ret = yf.download(ticker, start='2000-01-01', end='2022-12-31', progress=False)
    ret.index = ret.index.to_period('D')
    ret = ret["Adj Close"].resample("M").last()
    ret = ret.pct_change()
    ret.name = ticker
    return ret

In [3]:
# Pull data
TICKER = 'ARKK'

# Pull the data from Yahoo
df = returns(TICKER)
df



Date
2014-10       NaN
2014-11    0.0103
2014-12   -0.0209
2015-01    0.0040
2015-02    0.0627
            ...  
2022-08   -0.0720
2022-09   -0.0991
2022-10    0.0146
2022-11   -0.0209
2022-12   -0.1665
Freq: M, Name: ARKK, Length: 99, dtype: float64

In [4]:
# Add factors
ff3 = pdr('F-F_Research_Data_Factors','famafrench', start=1900)[0]/100
Mom = pdr('F-F_Momentum_Factor','famafrench', start=1900)[0]/100
Mom.columns = ['Mom']
ff = ff3.join(Mom)
df = pd.merge(df, ff, how='left', left_index=True, right_index=True)
df

Unnamed: 0_level_0,ARKK,Mkt-RF,SMB,HML,RF,Mom
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-10,,0.0252,0.0420,-0.0181,0.0000,-0.0006
2014-11,0.0103,0.0255,-0.0206,-0.0309,0.0000,0.0069
2014-12,-0.0209,-0.0006,0.0249,0.0227,0.0000,0.0112
2015-01,0.0040,-0.0311,-0.0055,-0.0358,0.0000,0.0384
2015-02,0.0627,0.0613,0.0061,-0.0186,0.0000,-0.0282
...,...,...,...,...,...,...
2022-08,-0.0720,-0.0377,0.0137,0.0030,0.0019,0.0199
2022-09,-0.0991,-0.0935,-0.0079,0.0006,0.0019,0.0347
2022-10,0.0146,0.0783,0.0009,0.0805,0.0023,0.0387
2022-11,-0.0209,0.0460,-0.0340,0.0138,0.0029,-0.0201


#### Run Fama-French-Carhart performance model

In [5]:
ff4 = sm.OLS(df[TICKER]-df['RF'], sm.add_constant(df[['Mkt-RF','SMB','HML','Mom']]),missing='drop').fit()
print(ff4.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.786
Model:                            OLS   Adj. R-squared:                  0.777
Method:                 Least Squares   F-statistic:                     85.61
Date:                Tue, 11 Apr 2023   Prob (F-statistic):           2.51e-30
Time:                        13:49:41   Log-Likelihood:                 166.50
No. Observations:                  98   AIC:                            -323.0
Df Residuals:                      93   BIC:                            -310.1
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0026      0.005     -0.553      0.5

In [6]:
coefs = ff4.params
coefs

const    -0.0026
Mkt-RF    1.4535
SMB       0.9113
HML      -0.9318
Mom      -0.0488
dtype: float64

#### Attribution analysis using FFC model

In [7]:
df = df.dropna()
cum_rets = pd.DataFrame(dtype=float, columns = [TICKER,'active','mkt','SMB','HML','Mom'],index=df.index)
cum_rets[TICKER] = (1 + df[TICKER]).cumprod()
cum_rets['mkt']  = (1 + df.RF + coefs['Mkt-RF']*df['Mkt-RF']).cumprod()
cum_rets['SMB']  = (1 + coefs['SMB']*df['SMB']).cumprod()
cum_rets['HML']  = (1 + coefs['HML']*df['HML']).cumprod()
cum_rets['Mom']  = (1 + coefs['Mom']*df['Mom']).cumprod()
cum_rets['active']=(1 + coefs['const'] + ff4.resid).cumprod()

In [8]:
# Scatter plot with returns due to each source
fig = go.Figure()
for c in cum_rets.columns:
    trace  = go.Scatter(x=cum_rets.index.astype(str), y=cum_rets[c], mode="lines", name = c)
    fig.add_trace(trace)

# Formatting
fig.update_yaxes(title='Cumulative Return',tickformat=".2f")
fig.update_layout(legend=dict(yanchor="top", y =0.99, xanchor="left", x=0.01),yaxis_type="log")
fig.show()