# Attribution Analysis

In [1]:
import pandas as pd
import numpy as np
from pandas_datareader import DataReader as pdr
import yfinance as yf
import plotly.graph_objects as go
import statsmodels.api as sm
pd.options.display.float_format = '{:.4f}'.format

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
# Function to pull returns
def returns(ticker):
    ret = yf.download(ticker, start='2000-01-01', end='2024-12-31', progress=False)
    ret = ret["Close"].resample("ME").last()
    ret = ret.pct_change()
    ret.columns = [ticker]
    ret.index = ret.index.to_period('M')
    return ret

In [3]:
# Pull data
TICKER = 'ARKK'
TICKER = 'DFLVX'

# Pull the data from Yahoo
df = returns(TICKER)
df

YF.download() has changed argument auto_adjust default to True


Unnamed: 0_level_0,DFLVX
Date,Unnamed: 1_level_1
2000-01,
2000-02,-0.0898
2000-03,0.1414
2000-04,0.0299
2000-05,-0.0022
...,...
2024-08,0.0130
2024-09,0.0056
2024-10,-0.0067
2024-11,0.0630


In [4]:
# Add factors
ff3 = pdr('F-F_Research_Data_Factors','famafrench', start=1900)[0]/100
Mom = pdr('F-F_Momentum_Factor','famafrench', start=1900)[0]/100
Mom.columns = ['Mom']
ff = ff3.join(Mom)
df = pd.merge(df, ff, how='left', left_index=True, right_index=True)
df

Unnamed: 0_level_0,DFLVX,Mkt-RF,SMB,HML,RF,Mom
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01,,-0.0474,0.0577,-0.0188,0.0041,0.0192
2000-02,-0.0898,0.0245,0.2136,-0.0959,0.0043,0.1820
2000-03,0.1414,0.0520,-0.1720,0.0813,0.0047,-0.0683
2000-04,0.0299,-0.0640,-0.0668,0.0726,0.0046,-0.0839
2000-05,-0.0022,-0.0442,-0.0605,0.0475,0.0050,-0.0898
...,...,...,...,...,...,...
2024-08,0.0130,0.0161,-0.0355,-0.0113,0.0048,0.0479
2024-09,0.0056,0.0174,-0.0017,-0.0259,0.0040,-0.0060
2024-10,-0.0067,-0.0097,-0.0101,0.0089,0.0039,0.0287
2024-11,0.0630,0.0651,0.0463,-0.0005,0.0040,0.0090


#### Run market model performance

In [5]:
mm = sm.OLS(df[TICKER]-df['RF'], sm.add_constant(df[['Mkt-RF']]),missing='drop').fit()
print(mm.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.794
Model:                            OLS   Adj. R-squared:                  0.793
Method:                 Least Squares   F-statistic:                     1146.
Date:                Wed, 16 Apr 2025   Prob (F-statistic):          6.10e-104
Time:                        20:33:38   Log-Likelihood:                 694.46
No. Observations:                 299   AIC:                            -1385.
Df Residuals:                     297   BIC:                            -1378.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0008      0.001      0.612      0.5

#### Run Fama-French-Carhart performance model

In [6]:
ff4 = sm.OLS(df[TICKER]-df['RF'], sm.add_constant(df[['Mkt-RF','SMB','HML','Mom']]),missing='drop').fit()
print(ff4.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.942
Model:                            OLS   Adj. R-squared:                  0.942
Method:                 Least Squares   F-statistic:                     1202.
Date:                Wed, 16 Apr 2025   Prob (F-statistic):          8.54e-181
Time:                        20:33:46   Log-Likelihood:                 884.88
No. Observations:                 299   AIC:                            -1760.
Df Residuals:                     294   BIC:                            -1741.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0002      0.001      0.230      0.8

In [7]:
coefs = ff4.params
coefs

const     0.0002
Mkt-RF    1.0086
SMB      -0.0681
HML       0.5330
Mom      -0.0784
dtype: float64

#### Attribution analysis using FFC model

In [8]:
df = df.dropna()
cum_rets = pd.DataFrame(dtype=float, columns = [TICKER,'active','mkt','SMB','HML','Mom'],index=df.index)
cum_rets[TICKER] = (1 + df[TICKER]).cumprod()
cum_rets['mkt']  = (1 + df.RF + coefs['Mkt-RF']*df['Mkt-RF']).cumprod()
cum_rets['SMB']  = (1 + coefs['SMB']*df['SMB']).cumprod()
cum_rets['HML']  = (1 + coefs['HML']*df['HML']).cumprod()
cum_rets['Mom']  = (1 + coefs['Mom']*df['Mom']).cumprod()
cum_rets['active']=(1 + coefs['const'] + ff4.resid).cumprod()

In [9]:
# Scatter plot with returns due to each source
fig = go.Figure()
for c in cum_rets.columns:
    trace  = go.Scatter(x=cum_rets.index.astype(str), y=cum_rets[c], mode="lines", name = c)
    fig.add_trace(trace)

# Formatting
fig.update_yaxes(title='Cumulative Return',tickformat=".2f")
fig.update_layout(legend=dict(yanchor="top", y =0.99, xanchor="left", x=0.01),yaxis_type="log")
fig.show()