In [41]:
import pandas as pd
import numpy as np
import datetime as DT
import pandas as pd
from pypfopt import EfficientFrontier
from google.cloud import bigquery
# from pypfopt import risk_models
# from pypfopt import expected_returns

In [2]:
import os

gcp_project = os.environ['GCP_PROJECT']

In [4]:
# Simplified query for this backtesting exercise

PROJECT = "le-wagon-hedge-fund"
DATASET = "data_alpaca_20240604"
TABLE = "SP500_Historical_Weekly"

query = f"""
    SELECT *
    FROM {PROJECT}.{DATASET}.{TABLE}
    WHERE symbol IN ('AAL','AAP','AAPL','ABBV','ABC','ABT','ACN','ADBE','ADI','ADM')
    """

client = bigquery.Client(project=gcp_project)
query_job = client.query(query)
result = query_job.result()
df = result.to_dataframe()

In [9]:
def get_all_data():
    # start_date=datetime.strptime(start_date,'%Y-%m-%d')
    # end_date=datetime.strptime(end_date,'%Y-%m-%d')
    PROJECT = "le-wagon-hedge-fund"
    DATASET = "data_alpaca_20240604"
    TABLE = "SP500_Historical_Weekly"
    query = f"""

    SELECT *
    FROM {PROJECT}.{DATASET}.{TABLE}
    WHERE (symbol IN ('AAL','AAP','AAPL','ABBV','ABC','ABT','ACN','ADBE','ADI','ADM'))
    """
    client = bigquery.Client(project=gcp_project)
    query_job = client.query(query)
    result = query_job.result()
    df = result.to_dataframe()
    return df

In [10]:
def get_data(start_date, end_date):
    # start_date=datetime.strptime(start_date,'%Y-%m-%d')
    # end_date=datetime.strptime(end_date,'%Y-%m-%d')
    PROJECT = "le-wagon-hedge-fund"
    DATASET = "data_alpaca_20240604"
    TABLE = "SP500_Historical_Weekly"
    query = f"""

    SELECT *
    FROM {PROJECT}.{DATASET}.{TABLE}
    WHERE (symbol IN ('AAL','AAP','AAPL','ABBV','ABC','ABT','ACN','ADBE','ADI','ADM'))
    AND (DATE(timestamp) BETWEEN '{start_date}' AND '{end_date}')
    """
    client = bigquery.Client(project=gcp_project)
    query_job = client.query(query)
    result = query_job.result()
    df = result.to_dataframe()
    return df

In [5]:
def model_try(df):
    time_df = df.pivot(index='timestamp',columns='symbol',values='close')
    returns_df = time_df.pct_change().dropna()
    expected_returns=pd.DataFrame(returns_df.mean(), columns=['expected_return'])
    cov_df=returns_df.cov()
    return expected_returns, cov_df

In [6]:
def making_portfolio(tickers,expected_returns, cov_df):
    ef = EfficientFrontier(expected_returns,cov_df)
    ef.tickers = tickers
    raw_weights = ef.max_sharpe(risk_free_rate=0.0001)
    cleaned_weights = ef.clean_weights()
    return pd.DataFrame(list(cleaned_weights.items()), columns=['ticker','weight']).set_index('ticker')

In [81]:
def portfolio_returns(weights: pd.DataFrame, start_date: str, end_date: str):
    # Finding the returns for all stocks between start and end date
    df=get_data(start_date,end_date)
    time_df = df.pivot(index='timestamp',columns='symbol',values='close')

    #Resetting index for the time_df
    time_df['clean_date']=time_df.index
    time_df['clean_date']=time_df['clean_date'].apply(lambda x: DT.datetime.strptime(x, '%Y-%m-%d %H:%M:%S+00:00'))\
        .apply(lambda x: f'{x.year}-{x.month:02d}-{x.day:02d}')
    time_df = time_df.set_index('clean_date')

    ret = time_df.loc[f'{end_date}']/time_df.loc[f'{start_date}']-1

    # Calculating portfolio return
    port_return = (weights.weight * ret).sum()

    return port_return


In [82]:
#TODO: Make this code more efficient by not querying every time but rather saving data locally while running

def backtesting(as_of_date, n_periods, period_type='W'):
    as_of = DT.datetime.strptime(as_of_date, '%Y-%m-%d').date()
    starting_point = as_of - DT.timedelta(days=7 * n_periods)
    port_return = 1
    weekly_returns = []

    # Training the model with data until the starting point
    #TODO: when we have a model, we'll train it until that date. For now we're just using a simple average.

    # Calculating portfolio returns
    while starting_point < as_of:
        one_week_ahead = starting_point + DT.timedelta(days=7)
        week_start_str = f'{starting_point.year}-{starting_point.month:02d}-{starting_point.day:02d}'
        week_end_str = f'{one_week_ahead.year}-{one_week_ahead.month:02d}-{one_week_ahead.day:02d}'
        df = get_data('2016-01-04',week_start_str)
        expected_returns, cov_df = model_try(df)
        cleaned_weights = making_portfolio(list(expected_returns.index),expected_returns.expected_return, cov_df)
        weekly_return = portfolio_returns(cleaned_weights,week_start_str,week_end_str)
        weekly_returns.append(weekly_return)
        port_return *= (1+weekly_return)
        starting_point += DT.timedelta(days=7)

    return port_return, weekly_returns

In [83]:
backtesting('2024-05-27',4)

  returns_df = time_df.pct_change().dropna()
  returns_df = time_df.pct_change().dropna()
  returns_df = time_df.pct_change().dropna()
  returns_df = time_df.pct_change().dropna()


(1.0181043080605576,
 [0.00446498707147595,
  0.019300914537515847,
  0.0028309838168795935,
  -0.008421013484098823])