## Loading data from BigQuery

In [4]:
import os

gcp_project = os.environ['GCP_PROJECT']

In [5]:
from google.cloud import bigquery

PROJECT = "le-wagon-hedge-fund"
DATASET = "data_alpaca_20240604"
TABLE = "SP500_Historical_Weekly"

query = f"""
    SELECT *
    FROM {PROJECT}.{DATASET}.{TABLE}
    """

client = bigquery.Client(project=gcp_project)
query_job = client.query(query)
result = query_job.result()
df = result.to_dataframe()

In [6]:
df.head()

Unnamed: 0,symbol,timestamp,open,high,low,close,volume,trade_count,vwap
0,AAPL,2016-01-04 05:00:00+00:00,102.61,105.85,96.43,96.96,362768447.0,1964372.0,100.620442
1,AAPL,2016-01-11 05:00:00+00:00,98.97,101.19,95.36,97.13,321021271.0,1746672.0,98.299269
2,AAPL,2016-01-18 05:00:00+00:00,98.41,101.46,93.42,101.42,254535461.0,1399429.0,97.214443
3,AAPL,2016-01-25 05:00:00+00:00,101.52,101.53,92.39,97.34,399618939.0,2037460.0,96.674642
4,AAPL,2016-02-01 05:00:00+00:00,96.47,97.33,93.69,94.02,226792333.0,1321006.0,95.583457


In [12]:
first_ticks = list(df.symbol.unique())[:10]
df2 = df[df['symbol'].isin(first_ticks)]
df2['symbol_shift']=df2['symbol'].shift(-1)
df2['close_shift']=df2['close'].shift(-1)
df2['close_returns']=df2['close_shift'][df2['symbol_shift']==df2['symbol']]/df2['close']-1
expected_returns = df2.groupby('symbol').agg({'close_returns':['mean']})
expected_returns.columns = expected_returns.columns.map('_'.join)
expected_returns=expected_returns.reset_index()
mu = expected_returns

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['symbol_shift']=df2['symbol'].shift(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['close_shift']=df2['close'].shift(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['close_returns']=df2['close_shift'][df2['symbol_shift']==df2['symbol']]/df2['close']-1


In [13]:
time_df = df2.groupby(['timestamp','symbol']).agg({'close_returns':'sum'})
S = time_df.reset_index().pivot(index='timestamp',columns='symbol',values='close_returns')
S = S.cov()

In [14]:
S

symbol,AAL,AAP,AAPL,ABBV,ABC,ABT,ACN,ADBE,ADI,ADM
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AAL,0.00601,0.001519,0.000838,0.000479,0.000875,0.000544,0.001,0.000824,0.0013,0.000816
AAP,0.001519,0.003079,0.000641,0.000735,0.000716,0.000593,0.000768,0.000606,0.000704,0.000686
AAPL,0.000838,0.000641,0.002759,0.00046,0.000461,0.000633,0.000718,0.001008,0.000797,0.000306
ABBV,0.000479,0.000735,0.00046,0.001322,0.000617,0.000556,0.000433,0.00041,0.000335,0.000335
ABC,0.000875,0.000716,0.000461,0.000617,0.001453,0.000526,0.000456,0.000386,0.000458,0.000453
ABT,0.000544,0.000593,0.000633,0.000556,0.000526,0.001075,0.000522,0.000652,0.000526,0.000278
ACN,0.001,0.000768,0.000718,0.000433,0.000456,0.000522,0.001146,0.000863,0.000665,0.000439
ADBE,0.000824,0.000606,0.001008,0.00041,0.000386,0.000652,0.000863,0.001864,0.00083,0.00029
ADI,0.0013,0.000704,0.000797,0.000335,0.000458,0.000526,0.000665,0.00083,0.001423,0.000477
ADM,0.000816,0.000686,0.000306,0.000335,0.000453,0.000278,0.000439,0.00029,0.000477,0.001356


## Building a portfolio

In [15]:
import pandas as pd
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns

# Read in price data
# df = pd.read_csv("tests/resources/stock_prices.csv", parse_dates=True, index_col="date")

# Calculate expected returns and sample covariance
# mu = expected_returns.mean_historical_return(df)
# S = risk_models.sample_cov(df)

# Optimize for maximal Sharpe ratio
ef = EfficientFrontier(mu.close_returns_mean*100, S)
ef.tickers = list(mu.symbol.unique())
raw_weights = ef.max_sharpe()
cleaned_weights = ef.clean_weights()
# ef.save_weights_to_file("weights.csv")  # saves to file
print(cleaned_weights)
ef.portfolio_performance(verbose=True)

OrderedDict([('AAL', 0.0), ('AAP', 0.0), ('AAPL', 0.02765), ('ABBV', 0.28493), ('ABC', 0.0), ('ABT', 0.0), ('ACN', 0.0), ('ADBE', 0.2808), ('ADI', 0.37089), ('ADM', 0.03574)])
Expected annual return: 39.4%
Annual volatility: 2.9%
Sharpe Ratio: 13.00


(0.39411045933379985, 0.028782218220591547, 12.997971750007492)