In [None]:
"""

This notebook is to manually reproduce portfolio returns and variance, to show the formulae we're using in Markowitz.

Just for learning purposes.

"""

In [75]:
import pandas as pd
import numpy as np

In [76]:
import os

gcp_project = os.environ['GCP_PROJECT']

In [4]:
from google.cloud import bigquery

PROJECT = "le-wagon-hedge-fund"
DATASET = "data_alpaca_20240604"
TABLE = "SP500_Historical_Weekly"

query = f"""
    SELECT *
    FROM {PROJECT}.{DATASET}.{TABLE}
    WHERE symbol IN ('AAL','AAP','AAPL','ABBV','ABC','ABT','ACN','ADBE','ADI','ADM')
    """

client = bigquery.Client(project=gcp_project)
query_job = client.query(query)
result = query_job.result()
df = result.to_dataframe()

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4351 entries, 0 to 4350
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   symbol       4351 non-null   object 
 1   timestamp    4351 non-null   object 
 2   open         4351 non-null   float64
 3   high         4351 non-null   float64
 4   low          4351 non-null   float64
 5   close        4351 non-null   float64
 6   volume       4351 non-null   float64
 7   trade_count  4351 non-null   float64
 8   vwap         4351 non-null   float64
dtypes: float64(7), object(2)
memory usage: 306.1+ KB


In [22]:
# Calculating weekly returns and creating a dataframe ordered by timestamp

df['symbol_shift']=df['symbol'].shift(-1)
df['close_shift']=df['close'].shift(-1)
df['close_returns']=df['symbol_shift'][df['symbol_shift']==df['symbol']]/df['close']-1
time_df = df.groupby(['timestamp','symbol']).agg({'close_returns':'sum'}).reset_index().pivot(index='timestamp',columns='symbol',values='close_returns')

In [29]:
# Removing the NaN values

short_time_df = time_df.dropna()

In [30]:
S = short_time_df.cov()
S

symbol,AAL,AAP,AAPL,ABBV,ABC,ABT,ACN,ADBE,ADI,ADM
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AAL,0.006302,0.001543,0.000899,0.000477,0.000875,0.000566,0.001015,0.000817,0.001395,0.000949
AAP,0.001543,0.003003,0.000727,0.000736,0.000716,0.000655,0.000796,0.000655,0.000775,0.000679
AAPL,0.000899,0.000727,0.002937,0.000507,0.000461,0.000701,0.000766,0.001045,0.000828,0.000353
ABBV,0.000477,0.000736,0.000507,0.001381,0.000617,0.000593,0.000458,0.000449,0.000382,0.00035
ABC,0.000875,0.000716,0.000461,0.000617,0.001453,0.000526,0.000456,0.000386,0.000458,0.000453
ABT,0.000566,0.000655,0.000701,0.000593,0.000526,0.001105,0.000528,0.000694,0.000554,0.000294
ACN,0.001015,0.000796,0.000766,0.000458,0.000456,0.000528,0.001153,0.000875,0.000701,0.000491
ADBE,0.000817,0.000655,0.001045,0.000449,0.000386,0.000694,0.000875,0.001848,0.000868,0.000333
ADI,0.001395,0.000775,0.000828,0.000382,0.000458,0.000554,0.000701,0.000868,0.001445,0.000513
ADM,0.000949,0.000679,0.000353,0.00035,0.000453,0.000294,0.000491,0.000333,0.000513,0.001249


In [37]:
weights=np.array([0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1])

In [45]:
portfolio_returns = np.dot(short_time_df.to_numpy(),weights)

In [46]:
short_time_df['portfolio']=portfolio_returns
short_time_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  short_time_df['portfolio']=portfolio_returns


symbol,AAL,AAP,AAPL,ABBV,ABC,ABT,ACN,ADBE,ADI,ADM,portfolio
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-04 05:00:00+00:00,-0.039881,-0.008725,0.001753,0.030368,-0.052981,-0.003196,0.010896,0.015026,-0.003201,-0.083745,-0.013369
2016-01-11 05:00:00+00:00,0.036120,0.000970,0.044168,0.025985,-0.019079,-0.012580,0.029918,0.005159,0.031313,0.054586,0.019656
2016-01-18 05:00:00+00:00,-0.029133,0.052759,-0.040229,-0.066803,-0.015824,-0.054459,0.032277,-0.005578,0.048268,0.063798,-0.001492
2016-01-25 05:00:00+00:00,-0.057451,-0.053338,-0.034107,-0.032423,-0.054265,-0.011625,-0.062251,-0.111410,-0.075009,-0.036209,-0.052809
2016-02-01 05:00:00+00:00,0.029116,-0.022023,-0.000319,-0.010166,0.010744,-0.007485,-0.043144,-0.031944,-0.003613,-0.047549,-0.012638
...,...,...,...,...,...,...,...,...,...,...,...
2023-07-31 04:00:00+00:00,-0.011995,0.017885,-0.023078,0.030123,0.011007,-0.021737,-0.009455,-0.034980,-0.027290,-0.008533,-0.007805
2023-08-07 04:00:00+00:00,-0.038978,-0.033896,-0.018561,-0.013405,-0.037199,-0.015193,-0.019770,-0.000629,-0.040718,-0.023461,-0.024181
2023-08-14 04:00:00+00:00,-0.030585,-0.082343,0.023612,-0.022979,-0.005100,0.004821,0.052222,0.033318,0.013713,-0.024146,-0.003747
2023-08-21 04:00:00+00:00,0.006859,0.055712,0.060747,0.010294,0.002786,-0.013051,0.028172,0.072658,0.023967,-0.012867,0.023528


In [50]:
avg_ret = np.array(short_time_df.mean(axis=0))

In [58]:
#CHECKING THAT RETURNS OF PORTFOLIO ARE WEIGHTED AVERAGE

weighted_avg = np.dot(weights.reshape(1,-1),avg_ret[:10])
portfolio = avg_ret[10]

In [63]:
portfolio

0.0027771014580969277

In [77]:
weighted_avg

array([0.0027771])

It works 🎉

In [69]:
cov_weights = np.dot(weights.reshape(-1,1),weights.reshape(1,-1))

In [72]:
np.multiply(cov_weights,S).sum().sum()

0.0008170876625930052

In [74]:
short_time_df['portfolio'].var()

0.0008170876625930051

#### It works 🎉