### Developing infrastructure for calculating rolling correlations over diff periods of time for combinations of asset pairs: (asset 1, asset 2, corr_window, business_date)

In [32]:
import os
import sys

import pandas as pd
from datetime import date
from dateutil.relativedelta import relativedelta
from typing import Optional

# Required to import other modules from this project, in folders such as utils/ or notebooks/
current_folder = os.getcwd()
project_root_folder = os.path.abspath(os.path.join(current_folder, ".."))
sys.path.append(project_root_folder)
# Import functionalities from other modules in this project
from utils.db_utils import *

# Load .env file for AWS RDS login credentials
from dotenv import load_dotenv
dotenv_path = os.path.join(project_root_folder, ".env")
load_dotenv(dotenv_path)

# Global variables
conn, cursor = connect_to_rds()

✅ Connected successfully!


### Generate unique pairs of assets. I might not need all of these, though

In [24]:
query = """
select ticker from tbl_active_tickers
where is_active = TRUE;
"""

df_active_tickers = sql_query_as_df(query, cursor)

In [25]:
# Dataframe -> series -> np array -> list
active_tickers = df_active_tickers["ticker"].values.tolist()

In [26]:
from itertools import combinations

In [28]:
asset_pairs = list(combinations(active_tickers, 2))
asset_pairs

[('SPY', 'GLD'),
 ('SPY', 'NVDA'),
 ('SPY', 'TSLA'),
 ('SPY', 'TLT'),
 ('SPY', 'SLV'),
 ('GLD', 'NVDA'),
 ('GLD', 'TSLA'),
 ('GLD', 'TLT'),
 ('GLD', 'SLV'),
 ('NVDA', 'TSLA'),
 ('NVDA', 'TLT'),
 ('NVDA', 'SLV'),
 ('TSLA', 'TLT'),
 ('TSLA', 'SLV'),
 ('TLT', 'SLV')]

### Function to calculate rolling correlation given (asset 1, asset 2, roll_corr_window, business_date). Default business_date should be most recent business_date.  Should rolling correlation window be passed in as "1M", "3M", and "6M"?

In [None]:
def calc_return_correlation(asset_1: str, asset_2: str, roll_corr_window: str, user_input_business_date_str: Optional[str]):

    # Get distinct business_date common to both asset_1 and asset_2
    query = f"""
    (
        select business_date
        from tbl_daily_prod
        where ticker = '{asset_1}'
    )
    intersect
    (
        select business_date
        from tbl_daily_prod
        where ticker = '{asset_2}'
    )
    order by business_date;
    """

    df_business_dates = sql_query_as_df(query, cursor)
    business_dates_common_to_both_assets = set(pd.to_datetime(df_business_dates["business_date"]))

    if user_input_business_date_str:
        user_input_business_date = pd.to_datetime(user_input_business_date_str)
    else:
        user_input_business_date = None

    # If the desired business_date from user is not provided, or if doesn't exist in the list of valid business_date, use max(business_dates)
    if (user_input_business_date is None) or (user_input_business_date not in business_dates_common_to_both_assets):
        end_date = max(business_dates_common_to_both_assets)
        print(f"User did not provide business_date or provided date not in business_dates of database, so defaulting to max(business_dates): {end_date}")
    else:
        end_date = user_input_business_date

    # I need date operations: given end_date (date), I can find 1 month or 3 month prior
    # TODO: Need to spend more time on exception handling with: if/try/raise/except and what combinations make sense together
    roll_corr_window_map = {"1M": 1, "3M": 3, "6M": 6}
    try:
        n_months = roll_corr_window_map[roll_corr_window]
    except KeyError:
        valid_keys = list(roll_corr_window_map.keys())
        print(f"User input {roll_corr_window} is not a valid input. Needs to be among: {valid_keys}")

    n_months_before_end_date = end_date - relativedelta(months = n_months)

    # TODO: If n_months_before_end_date does not fall within the list of business_dates_common_to_both_assets, do I go up or down, to the next business_date or to the prev business_date?

    #  Finally, I get the time series of daily returns from both asset 1 and 2 between start_date and end_date (by querying the prod table tbl_daily_prod), calculate correlation, and return that number
    

In [33]:
# TODO: Should I create a vectorized version of this, for generating a time series / history of rolling correlations?  Something named: calc_rolling_return_correlation or calc_return_correlation_time_series