In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

start = "2024-11-25"

# NASDAQ
NAS = yf.download('^IXIC', interval="1d", start=start)[['Adj Close']].pct_change()
# Dow Jones Industrial Average
DJI = yf.download('^DJI', interval="1d", start=start)[['Adj Close']].pct_change()
# S&P 500 (as the benchmark)
SP5 = yf.download('^GSPC', interval="1d", start=start)[['Adj Close']].pct_change()

NAS.columns, DJI.columns, SP5.columns = ['NAS'], ['DJI'], ['SP5']
data = pd.concat([NAS, DJI, SP5], axis=1).dropna()
data

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,NAS,DJI,SP5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-11-26,0.006269,0.002766,0.005722
2024-11-27,-0.005936,-0.003082,-0.003801
2024-11-29,0.008273,0.004217,0.005608
2024-12-02,0.009667,-0.002865,0.002448
2024-12-03,0.003966,-0.001708,0.000451
2024-12-04,0.013049,0.006901,0.006051
2024-12-05,-0.001766,-0.005517,-0.00187
2024-12-06,0.008097,-0.002752,0.002495
2024-12-09,-0.006197,-0.005389,-0.006144
2024-12-10,-0.002505,-0.003471,-0.002964


In [3]:
import warnings 
warnings.filterwarnings('ignore')

res = sm.OLS(data.NAS, sm.add_constant(data.SP5)).fit()
res.summary()

0,1,2,3
Dep. Variable:,NAS,R-squared:,0.909
Model:,OLS,Adj. R-squared:,0.901
Method:,Least Squares,F-statistic:,110.2
Date:,"Sun, 15 Dec 2024",Prob (F-statistic):,4.55e-07
Time:,12:38:28,Log-Likelihood:,60.718
No. Observations:,13,AIC:,-117.4
Df Residuals:,11,BIC:,-116.3
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0022,0.001,3.117,0.010,0.001,0.004
SP5,1.5889,0.151,10.495,0.000,1.256,1.922

0,1,2,3
Omnibus:,1.184,Durbin-Watson:,1.292
Prob(Omnibus):,0.553,Jarque-Bera (JB):,0.644
Skew:,-0.527,Prob(JB):,0.725
Kurtosis:,2.721,Cond. No.,222.0


In [4]:
dir(res)
res.params
res.params.iloc[1]
res.resid.std()

0.0023589694663368544

In [5]:
benchmark = 'SP5'
RBenchmark = data[benchmark]

perfEval = pd.DataFrame(index=['AriRet', 'Sharpe', 'Beta', 'TreyRatio',
                               'JenAlpha',  'AppRatio', 'M2', 'IndAlpha',
                               'TE', 'InfoRatio'],
                        columns=['NAS'])
# about 252 trading days per year
TDays = 252

# annualized the Sharpe Ratio from daily data
SharpeBenchmark = RBenchmark.mean()/RBenchmark.std() * (TDays)**0.5

VolaBenchmark = RBenchmark.std() * (TDays)**0.5

for p in ['NAS', 'DJI', 'SP5']:
    RP = data[p]

    perfEval.loc['AriRet', p] = RP.mean() * TDays
    perfEval.loc['Sharpe', p] = RP.mean()/RP.std() * (TDays)**0.5

    res = sm.OLS(RP, sm.add_constant(RBenchmark)).fit()

    perfEval.loc['Beta', p]      = res.params.loc[benchmark]
    perfEval.loc['TreyRatio', p] = RP.mean()/res.params.loc[benchmark] * TDays
    perfEval.loc['JenAlpha', p]  = res.params.iloc[0] * TDays
    perfEval.loc['AppRatio', p]  = round(res.params.iloc[0], 10)/res.resid.std() * ((TDays)**0.5)
    perfEval.loc['M2', p] = (perfEval.loc['Sharpe', p] - SharpeBenchmark) * VolaBenchmark
    perfEval.loc['IndAlpha', p]  = (RP - RBenchmark).mean() * TDays
    perfEval.loc['TE', p]        = (RP - RBenchmark).std() * ((TDays)**0.5)
    perfEval.loc['InfoRatio', p] = perfEval.loc['IndAlpha', p]/perfEval.loc['TE', p]

display(np.round(perfEval.astype(float), 3))

Unnamed: 0,NAS,DJI,SP5
AriRet,0.876,-0.396,0.208
Sharpe,7.048,-6.476,2.787
Beta,1.589,0.602,1.0
TreyRatio,0.551,-0.658,0.208
JenAlpha,0.546,-0.521,0.0
AppRatio,14.57,-12.552,0.0
M2,0.318,-0.691,0.0
IndAlpha,0.668,-0.604,0.0
TE,0.058,0.051,0.0
InfoRatio,11.574,-11.827,


In [6]:
MktTime = data[['SP5']].copy()

MktTime['SqRm'] = MktTime['SP5']**2

for p in ['NAS', 'DJI',]:
    RP = data[p]

    res = sm.OLS(RP, sm.add_constant(MktTime)).fit()

    print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                    NAS   R-squared:                       0.909
Model:                            OLS   Adj. R-squared:                  0.891
Method:                 Least Squares   F-statistic:                     50.10
Date:                Sun, 15 Dec 2024   Prob (F-statistic):           6.15e-06
Time:                        12:39:27   Log-Likelihood:                 60.722
No. Observations:                  13   AIC:                            -115.4
Df Residuals:                      10   BIC:                            -113.7
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0021      0.001      1.935      0.0

In [None]:
MktTime

Unnamed: 0_level_0,SP5,SqRm
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-11-04 00:00:00+00:00,-0.002812,7.907813e-06
2024-11-05 00:00:00+00:00,0.012266,0.000150446
2024-11-06 00:00:00+00:00,0.025296,0.000639884
2024-11-07 00:00:00+00:00,0.007431,5.522318e-05
2024-11-08 00:00:00+00:00,0.003757,1.41138e-05
2024-11-11 00:00:00+00:00,0.000969,9.390839e-07
2024-11-12 00:00:00+00:00,-0.002893,8.36748e-06
2024-11-13 00:00:00+00:00,0.000232,5.392972e-08
2024-11-14 00:00:00+00:00,-0.00605,3.659929e-05
2024-11-15 00:00:00+00:00,-0.013203,0.0001743321


# START FROM HERE !!! For Group 3 
- benchmark : SOX 

In [8]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

start = "2024-11-25"

In [9]:

# Define portfolio stocks and create empty dataframe
stocks = ['AVGO', 'NVDA', 'LRCX', 'TSM', 'AMKR', 'QCOM', 'ASML', 'AMD', 'INTC', 'AMAT']
weights = [1.528955, 1.190034, 0.991454, 0.105532, -0.011699, -0.227998, -0.399155, -0.506331, -0.767735, -0.903055]

portfolio_data = pd.DataFrame()

# Download individual stock data
for stock in stocks:
    temp = yf.download(stock, interval="1d", start=start)[['Adj Close']].pct_change()
    temp.columns = [stock]
    portfolio_data = pd.concat([portfolio_data, temp], axis=1)

# Calculate portfolio returns
portfolio_data['Portfolio'] = np.sum([portfolio_data[stock] * weight for stock, weight in zip(stocks, weights)], axis=0)



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [10]:
portfolio_data

Unnamed: 0_level_0,AVGO,NVDA,LRCX,TSM,AMKR,QCOM,ASML,AMD,INTC,AMAT,Portfolio
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-11-25,,,,,,,,,,,
2024-11-26,-0.000485,0.006617,-0.013838,-0.0067,-0.022685,-0.0119,-0.018262,-0.024162,-0.032972,-0.007734,0.047505
2024-11-27,-0.030776,-0.01154,-0.015408,-0.014415,-0.009893,-0.003377,-0.002217,-0.010746,-0.016632,-0.010854,-0.047802
2024-11-29,0.015094,0.021501,0.032276,0.019151,0.016141,0.013619,0.024057,0.006899,0.016913,0.019788,0.035442
2024-12-02,0.027332,0.002749,0.062669,0.052746,0.03177,0.028386,0.036207,0.035574,-0.00499,0.048995,0.033038
2024-12-03,0.009849,0.011758,-0.002165,0.023097,-0.010997,-0.00276,0.009263,-0.000563,-0.061011,-0.0006,0.07407
2024-12-04,0.014332,0.034793,-0.001021,0.00905,0.001812,0.004675,0.00259,0.014157,-0.022697,-0.010919,0.081258
2024-12-05,-0.000528,-0.000482,-0.038973,0.017988,-0.01997,-0.012924,-0.011696,-0.018265,-0.052823,-0.050397,0.065041
2024-12-06,0.053147,-0.018061,0.018349,-0.006265,0.010381,-0.005487,-0.003542,-0.019595,0.005769,0.005755,0.080135
2024-12-09,-0.003286,-0.025484,0.011229,-0.018964,0.019406,0.007711,-0.00347,-0.055704,-0.005258,-0.006704,0.011475


In [12]:
# Download SOX as benchmark
SOX = yf.download('^SOX', interval="1d", start=start)[['Adj Close']].pct_change()
SOX.columns = ['SOX']

# Combine portfolio and benchmark data
data = pd.concat([portfolio_data[['Portfolio']], SOX], axis=1).dropna()
data

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Portfolio,SOX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-11-26,0.047505,-0.012101
2024-11-27,-0.047802,-0.015129
2024-11-29,0.035442,0.015156
2024-12-02,0.033038,0.026136
2024-12-03,0.07407,-0.003849
2024-12-04,0.081258,0.017062
2024-12-05,0.065041,-0.018628
2024-12-06,0.080135,0.006949
2024-12-09,0.011475,-0.008389
2024-12-10,-0.06394,-0.024735


In [13]:
# Run initial regression
res = sm.OLS(data.Portfolio, sm.add_constant(data.SOX)).fit()
print("\nInitial Regression Results:")
print(res.summary())


Initial Regression Results:
                            OLS Regression Results                            
Dep. Variable:              Portfolio   R-squared:                       0.490
Model:                            OLS   Adj. R-squared:                  0.444
Method:                 Least Squares   F-statistic:                     10.58
Date:                Sun, 15 Dec 2024   Prob (F-statistic):            0.00770
Time:                        12:47:38   Log-Likelihood:                 14.200
No. Observations:                  13   AIC:                            -24.40
Df Residuals:                      11   BIC:                            -23.27
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0477  

### Analysis : 
- 整體模型顯著性:

1. R-squared = 0.490，表示約49%的投資組合報酬波動可以被SOX指數解釋
2. F-statistic的p值為0.00770 < 0.05，表示模型整體統計顯著


- Beta係數 (SOX的係數):

1. Beta = 4.2617，這是一個非常高的值
2. 統計顯著 (p值 = 0.008 < 0.05)
3. 意味著當SOX指數上升1%時，投資組合預期會上升4.26%
4. 表示這個投資組合比SOX指數具有更高的波動性和槓桿效果


- Alpha係數 (const):

1. Alpha = 0.0477 (4.77%)
2. 邊際顯著 (p值 = 0.080，接近但略高於0.05)
3. 顯示投資組合在控制市場因素後可能有正的超額報酬


- 模型診斷:

1. Durbin-Watson = 1.928，接近2，表示殘差沒有明顯的自相關
2. Jarque-Bera測試的p值 = 0.524 > 0.05，表示殘差接近常態分配
3. Skew = 0.743和Kurtosis = 3.421，表示殘差分配略微右偏但接近常態



- Overall:

1. 具有很高的市場敏感度（高Beta）
2. 可能有產生超額報酬的能力（正Alpha）
3. 但僅約一半的報酬可被市場解釋，表示還有相當大部分來自個股特質
4. 整體而言是一個高風險高報酬的積極投資組合

In [16]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Performance Evaluation
perfEval = pd.DataFrame(index=['AriRet', 'Sharpe', 'Beta', 'TreyRatio',
                              'JenAlpha', 'AppRatio', 'M2', 'IndAlpha',
                              'TE', 'InfoRatio'],
                       columns=['Portfolio', 'SOX'])

# Trading days per year
TDays = 252

# Calculate benchmark Sharpe ratio and volatility
RBenchmark = data.SOX
SharpeBenchmark = RBenchmark.mean()/RBenchmark.std() * (TDays)**0.5
VolaBenchmark = RBenchmark.std() * (TDays)**0.5

# Calculate performance metrics for Portfolio
RP = data.Portfolio

perfEval.loc['AriRet', 'Portfolio'] = RP.mean() * TDays
perfEval.loc['Sharpe', 'Portfolio'] = RP.mean()/RP.std() * (TDays)**0.5

res = sm.OLS(RP, sm.add_constant(RBenchmark)).fit()

perfEval.loc['Beta', 'Portfolio'] = res.params.iloc[1]
perfEval.loc['TreyRatio', 'Portfolio'] = RP.mean()/res.params.iloc[1] * TDays
perfEval.loc['JenAlpha', 'Portfolio'] = res.params.iloc[0] * TDays
perfEval.loc['AppRatio', 'Portfolio'] = round(res.params.iloc[0], 10)/res.resid.std() * ((TDays)**0.5)
perfEval.loc['M2', 'Portfolio'] = (perfEval.loc['Sharpe', 'Portfolio'] - SharpeBenchmark) * VolaBenchmark
perfEval.loc['IndAlpha', 'Portfolio'] = (RP - RBenchmark).mean() * TDays
perfEval.loc['TE', 'Portfolio'] = (RP - RBenchmark).std() * ((TDays)**0.5)
perfEval.loc['InfoRatio', 'Portfolio'] = perfEval.loc['IndAlpha', 'Portfolio']/perfEval.loc['TE', 'Portfolio']

# Calculate performance metrics for Benchmark (SOX)
perfEval.loc['AriRet', 'SOX'] = RBenchmark.mean() * TDays
perfEval.loc['Sharpe', 'SOX'] = SharpeBenchmark

# Benchmark's beta is always 1 with itself
perfEval.loc['Beta', 'SOX'] = 1.0
perfEval.loc['TreyRatio', 'SOX'] = RBenchmark.mean() * TDays  # Same as AriRet for benchmark
perfEval.loc['JenAlpha', 'SOX'] = 0.0  # By definition
perfEval.loc['AppRatio', 'SOX'] = 0.0  # By definition
perfEval.loc['M2', 'SOX'] = 0.0  # By definition
perfEval.loc['IndAlpha', 'SOX'] = 0.0  # By definition
perfEval.loc['TE', 'SOX'] = 0.0  # By definition
perfEval.loc['InfoRatio', 'SOX'] = np.nan  # Not applicable for benchmark

print("\nPerformance Metrics:")
print(np.round(perfEval.astype(float), 3))


Performance Metrics:
           Portfolio    SOX
AriRet        14.857  0.664
Sharpe         7.910  2.150
Beta           4.262  1.000
TreyRatio      3.486  0.664
JenAlpha      12.029  0.000
AppRatio       8.970  0.000
M2             1.777  0.000
IndAlpha      14.193  0.000
TE             1.677  0.000
InfoRatio      8.465    NaN


##  perf matrix analysis 
讓我們以更客觀的角度來分析這些數據：

1. **Sharpe Ratio**
- Portfolio: 7.910 vs Benchmark: 2.150
- 以日均值計算的夏普比率確實會比年化的看起來更高 ( over-valued)
- 一般而言，好的基金年化夏普比率在1-2之間
- 基準的2.150是合理的數值，表示半導體產業本身就具有不錯的風險調整後報酬
- 投資組合的7.910雖然看似很高，但要考慮：
  - 這是短期數據（僅約13個交易日）
  - 短期數據容易受極端值影響
  - 波動度在短期內可能被低估

2. **Beta = 4.262**
- 這個槓桿效果相當高
- 意味著投資組合的波動是SOX的4.26倍
- 在市場上漲時會有很好的表現
- 但在市場下跌時也會有更大的跌幅
- 這個Beta值反映了一個非常積極且風險很高的策略

3. **年化報酬率（AriRet）**
- Portfolio: 14.857% vs Benchmark: 0.664%
- 這個差距非常大
- 但要注意這是把短期報酬直接年化的結果
- 實際上很難在長期維持這樣的差距

4. **風險調整指標**
- Information Ratio = 8.465
  - 一般認為IR > 0.5就很好
  - IR > 1 是極其優秀
  - 8.465的IR在長期是幾乎不可能維持的
- Treynor Ratio = 3.486
  - 反映了不錯的風險調整後報酬
  - 但同樣要考慮短期效應

5. **追蹤誤差（TE）= 1.677%**
- 顯示與基準有顯著偏離
- 這是可以預期的，因為我們用了很高的槓桿效果

客觀建議：
1. 這個投資組合在短期內表現確實很好，但不應期待這樣的表現能持續
2. 需要更長的時間週期（至少3-6個月）來真實評估策略效果
3. 高Beta策略在市場反轉時風險很大，需要有適當的風險管理機制
4. 建議觀察更長期的表現，特別是在不同市場環境下的表現
5. 可以考慮降低一些槓桿效果以減少極端風險



In [17]:
# Market Timing Analysis

MktTime = data[['SOX']].copy()
MktTime['SqRm'] = MktTime['SOX']**2

timing_res = sm.OLS(data.Portfolio, sm.add_constant(MktTime)).fit()
print("\nMarket Timing Analysis:")
print(timing_res.summary())


Market Timing Analysis:
                            OLS Regression Results                            
Dep. Variable:              Portfolio   R-squared:                       0.580
Model:                            OLS   Adj. R-squared:                  0.496
Method:                 Least Squares   F-statistic:                     6.912
Date:                Sun, 15 Dec 2024   Prob (F-statistic):             0.0130
Time:                        13:01:56   Log-Likelihood:                 15.463
No. Observations:                  13   AIC:                            -24.93
Df Residuals:                      10   BIC:                            -23.23
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0057      

### Market timing analysis : 
- Model : 
    Portfolio Return = α + β1(Market Return) + β2(Market Return²) + ε
    - B1: normal market beta 
    - B2 : timing ability 

以下以0.95 confidnece level 皆不顯著 ：
- const: 0.0057 (p=0.880)
- SOX: 3.0156 (p=0.074)
- SqRm: 127.2638 (p=0.174)
    - β2（SqRm係數）顯著為正：
        - 表示在市場上漲時，投資組合的Beta會提高
        - 在市場下跌時，Beta會降低
        - 這意味著管理者能夠正確預測市場走向並調整風險
- r squared : 
    - 從0.490上升到0.580，表示加入平方項後解釋力有所提升
    - 表示可能有一些擇時能力，但證據不夠強

