## This notebook creates a BQ table with historical yield averages

9-15-2023

This notebook creates analytics for JR Rieger. The goal here was to create a "daily Schoonover" for a day in the past. Essentially, the notebook produces historical average yields for by coupon and maturity.  We do this both for one day and five day windows.

In [2]:
import os
import datetime
import pandas as pd
from google.cloud import bigquery
from pandas_gbq import to_gbq

os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="../creds.json"
bq_client = bigquery.Client()

project = "eng-reactor-287421"

In [3]:
def sqltodf(sql,limit = ""):
    if limit != "": 
        limit = f" ORDER BY RAND() LIMIT {limit}"
    bqr = bq_client.query(sql + limit).result()
    return bqr.to_dataframe()

In [10]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Get the current date
current_date = pd.Timestamp.now().date()

# Define the number of calendar days in the past you want to find
days_in_past = [30, 60, 90]

# List of known US Federal Reserve holidays
us_holidays = [
    "2023-01-02", "2023-01-16", "2023-02-20", "2023-04-07", "2023-05-29",
    "2023-07-04", "2023-09-04", "2023-10-09", "2023-11-23", "2023-12-25",
    # Add more holidays as needed
]

# Function to find the previous business day, excluding weekends and holidays
def find_previous_business_day(start_date):
    while True:
        start_date -= timedelta(days=1)
        # Skip weekends (Saturday and Sunday)
        if start_date.weekday() >= 5:
            continue
        # Check if the date is a holiday
        if start_date.strftime('%Y-%m-%d') in us_holidays:
            continue
        return start_date

# Calculate business days in the past based on the specified calendar days
business_days_in_past = []
last_business_day = find_previous_business_day(current_date)
business_days_in_past.append(f"'{last_business_day.strftime('%Y-%m-%d')}'") 

for days in days_in_past:
    past_date = current_date - timedelta(days=days)
    past_business_day = find_previous_business_day(past_date)
    business_days_in_past.append(f"'{past_business_day.strftime('%Y-%m-%d')}'")  # Format and add as a string

# Find the last business day of the last calendar year
last_business_day_of_last_year = current_date.replace(year=current_date.year - 1, month=12, day=31)
last_business_day_of_last_year = find_previous_business_day(last_business_day_of_last_year)
business_days_in_past.append(f"'{last_business_day_of_last_year.strftime('%Y-%m-%d')}'")  # Format and add as a string

# Print the results
for i, days in enumerate(days_in_past):
    print(f"{days} calendar days in the past: {business_days_in_past[i]}")

# Save the list of formatted business days as strings
# business_days_in_past contains the list of dates as formatted strings


30 calendar days in the past: '2023-09-14'
60 calendar days in the past: '2023-08-15'
90 calendar days in the past: '2023-07-14'


In [11]:
business_days_in_past

["'2023-09-14'",
 "'2023-08-15'",
 "'2023-07-14'",
 "'2023-06-16'",
 "'2022-12-30'"]

In [12]:
# Define a list of valid dates
valid_dates = business_days_in_past

# Initialize an empty list to store DataFrames
dfs = []

# Execute the SQL query for each valid date and store the results in DataFrames
for valid_date in valid_dates:
    valid_date = valid_date
    query = query = f"""SELECT
  COUNT(DISTINCT rtrs_control_number) AS trade_count_A_AA_rated,
  -- ROUND(AVG(par_traded)) AS average_par_traded,
  -- SUM(par_traded) AS total_trade_volume,
  (
    SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
    FROM auxiliary_views.materialized_trade_history
    WHERE trade_date = {valid_date}
      AND DATE_DIFF(maturity_date, {valid_date}, YEAR) > 3
      AND DATE_DIFF(maturity_date, {valid_date}, YEAR) < 7
      AND coupon = 3
      AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
  ) AS avg_yield_for_3_percent_coupon_5_yr,

  (
    SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
    FROM auxiliary_views.materialized_trade_history
    WHERE trade_date =  {valid_date}
      AND DATE_DIFF(maturity_date, {valid_date}, YEAR) > 8
      AND DATE_DIFF(maturity_date, {valid_date}, YEAR) < 12
      AND coupon = 3
      AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
  ) AS avg_yield_for_3_percent_coupon_10_yr,

  (
    SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
    FROM auxiliary_views.materialized_trade_history
    WHERE trade_date = {valid_date}
      AND DATE_DIFF(maturity_date, {valid_date}, YEAR) > 18
      AND DATE_DIFF(maturity_date, {valid_date}, YEAR) < 22
      AND coupon = 3
      AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
  ) AS avg_yield_for_3_percent_coupon_20_yr,

  (
    SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
    FROM auxiliary_views.materialized_trade_history
    WHERE trade_date = {valid_date}
      AND DATE_DIFF(maturity_date, {valid_date}, YEAR) > 3
      AND DATE_DIFF(maturity_date, {valid_date}, YEAR) < 7
      AND coupon = 4
      AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
  ) AS avg_yield_for_4_percent_coupon_5_yr,

  (
    SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
    FROM auxiliary_views.materialized_trade_history
    WHERE trade_date = {valid_date}
      AND DATE_DIFF(maturity_date, {valid_date}, YEAR) > 8
      AND DATE_DIFF(maturity_date, {valid_date}, YEAR) < 12
      AND coupon = 4
      AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
  ) AS avg_yield_for_4_percent_coupon_10_yr,

  (
    SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
    FROM auxiliary_views.materialized_trade_history
    WHERE trade_date = {valid_date}
      AND DATE_DIFF(maturity_date, {valid_date}, YEAR) > 18
      AND DATE_DIFF(maturity_date, {valid_date}, YEAR) < 22
      AND coupon = 4
      AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
  ) AS avg_yield_for_4_percent_coupon_20_yr,

(
  SELECT ROUND(AVG(yield), 2) AS yield_5_coupon
  FROM auxiliary_views.materialized_trade_history
  WHERE trade_date = {valid_date}
    AND DATE_DIFF(next_call_date, {valid_date}, YEAR) > 3
    AND DATE_DIFF(next_call_date, {valid_date}, YEAR) < 7
    AND coupon = 5
    AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
) AS avg_yield_for_5_percent_coupon_5_yr_to_call,

(
  SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
  FROM auxiliary_views.materialized_trade_history
  WHERE trade_date = {valid_date}
    AND DATE_DIFF(next_call_date, {valid_date}, YEAR) > 6
    AND DATE_DIFF(next_call_date, {valid_date}, YEAR) < 8
    AND coupon = 5
    AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
) AS avg_yield_for_5_percent_coupon_7_yr_to_call,

(
  SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
  FROM auxiliary_views.materialized_trade_history
  WHERE trade_date = {valid_date}
    AND DATE_DIFF(next_call_date, {valid_date}, YEAR) > 8
    AND DATE_DIFF(next_call_date, {valid_date}, YEAR) < 12
    AND coupon = 5
    AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
) AS avg_yield_for_5_percent_coupon_10_yr_to_call,

(
  SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
  FROM auxiliary_views.materialized_trade_history
  WHERE trade_date = {valid_date}
    AND DATE_DIFF(next_call_date, {valid_date}, YEAR) > 18
    AND DATE_DIFF(next_call_date, {valid_date}, YEAR) < 22
    AND coupon = 5
    AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
) AS avg_yield_for_5_percent_coupon_20_yr_to_call

FROM auxiliary_views.materialized_trade_history
WHERE trade_date = {valid_date}
  AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))"""
    result_df = sqltodf(query)

    # Add a 'date' column with the current valid_date
    result_df['date'] = valid_date


    # Append the DataFrame to the list
    dfs.append(result_df)

# Concatenate all DataFrames into one
final_df = pd.concat(dfs, ignore_index=True)

# Display the final DataFrame


In [13]:
final_df

Unnamed: 0,trade_count_A_AA_rated,avg_yield_for_3_percent_coupon_5_yr,avg_yield_for_3_percent_coupon_10_yr,avg_yield_for_3_percent_coupon_20_yr,avg_yield_for_4_percent_coupon_5_yr,avg_yield_for_4_percent_coupon_10_yr,avg_yield_for_4_percent_coupon_20_yr,avg_yield_for_5_percent_coupon_5_yr_to_call,avg_yield_for_5_percent_coupon_7_yr_to_call,avg_yield_for_5_percent_coupon_10_yr_to_call,avg_yield_for_5_percent_coupon_20_yr_to_call,date
0,40743,3.73,4.08,5.09,3.43,3.7,4.25,3.58,3.53,3.84,,'2023-09-14'
1,37344,3.53,3.63,4.74,3.36,3.47,4.13,3.31,3.34,3.69,,'2023-08-15'
2,25486,3.3,3.52,4.47,3.13,3.35,4.05,3.13,3.18,3.45,,'2023-07-14'
3,28745,3.32,3.48,4.53,3.14,3.3,4.06,3.18,3.18,3.53,,'2023-06-16'
4,23379,3.22,3.62,4.75,3.1,3.37,4.23,3.38,3.43,3.65,,'2022-12-30'


In [14]:
reference_date = "'2022-12-30'"
df = final_df
# Iterate through columns and calculate the differences
for column in df.columns:
    if column != 'date':
        new_column_name = f'change_since_2022_for_{column}'
        df[new_column_name] = round(df[column] - df.loc[df['date'] == reference_date, column].values[0], 4)


In [17]:
df = df[['date'] + [col for col in df.columns if col != 'date']]
table_id = 'eng-reactor-287421.jesse_tests.jr_test'

to_gbq(df, destination_table=table_id, project_id='eng-reactor-287421', if_exists='replace')


# # Upload the DataFrame to BigQuery
# to_gbq(df, destination_table=table_id, project_id, if_exists='replace')


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 10131.17it/s]


In [46]:
valid_dates = business_days_in_past


# Remove the last item from the list
valid_dates.pop()

# Append the new item
valid_dates.append("'2022-12-25'")


# Initialize an empty list to store DataFrames
dfs = []

# Execute the SQL query for each valid date and store the results in DataFrames
for valid_date in valid_dates:
    valid_date = valid_date
    vd = pd.to_datetime(valid_date)
    ed = vd + timedelta(days=5)
    ed = ed.strftime('%Y-%m-%d')
    end_date = f"'{ed}'"
    query = f"""SELECT
      COUNT(DISTINCT rtrs_control_number) AS trade_count_A_AA_rated,
      -- ROUND(AVG(par_traded)) AS average_par_traded,
      -- SUM(par_traded) AS total_trade_volume,
      (
        SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
        FROM auxiliary_views.materialized_trade_history
        WHERE trade_date BETWEEN {valid_date} AND {end_date}
          AND DATE_DIFF(maturity_date, {valid_date}, YEAR) > 3
          AND DATE_DIFF(maturity_date, {valid_date}, YEAR) < 7
          AND coupon = 3
          AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
      ) AS avg_yield_for_3_percent_coupon_5_yr,

      (
        SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
        FROM auxiliary_views.materialized_trade_history
        WHERE trade_date BETWEEN {valid_date} AND {end_date}
          AND DATE_DIFF(maturity_date, {valid_date}, YEAR) > 8
          AND DATE_DIFF(maturity_date, {valid_date}, YEAR) < 12
          AND coupon = 3
          AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
      ) AS avg_yield_for_3_percent_coupon_10_yr,

      (
        SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
        FROM auxiliary_views.materialized_trade_history
        WHERE trade_date BETWEEN {valid_date} AND {end_date}
          AND DATE_DIFF(maturity_date, {valid_date}, YEAR) > 18
          AND DATE_DIFF(maturity_date, {valid_date}, YEAR) < 22
          AND coupon = 3
          AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
      ) AS avg_yield_for_3_percent_coupon_20_yr,

      (
        SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
        FROM auxiliary_views.materialized_trade_history
        WHERE trade_date BETWEEN {valid_date} AND {end_date}
          AND DATE_DIFF(maturity_date, {valid_date}, YEAR) > 3
          AND DATE_DIFF(maturity_date, {valid_date}, YEAR) < 7
          AND coupon = 4
          AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
      ) AS avg_yield_for_4_percent_coupon_5_yr,

      (
        SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
        FROM auxiliary_views.materialized_trade_history
        WHERE trade_date BETWEEN {valid_date} AND {end_date}
          AND DATE_DIFF(maturity_date, {valid_date}, YEAR) > 8
          AND DATE_DIFF(maturity_date, {valid_date}, YEAR) < 12
          AND coupon = 4
          AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
      ) AS avg_yield_for_4_percent_coupon_10_yr,

      (
        SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
        FROM auxiliary_views.materialized_trade_history
        WHERE trade_date BETWEEN {valid_date} AND {end_date}
          AND DATE_DIFF(maturity_date, {valid_date}, YEAR) > 18
          AND DATE_DIFF(maturity_date, {valid_date}, YEAR) < 22
          AND coupon = 4
          AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
      ) AS avg_yield_for_4_percent_coupon_20_yr,

    (
      SELECT ROUND(AVG(yield), 2) AS yield_5_coupon
      FROM auxiliary_views.materialized_trade_history
      WHERE trade_date BETWEEN {valid_date} AND {end_date}
        AND DATE_DIFF(next_call_date, {valid_date}, YEAR) > 3
        AND DATE_DIFF(next_call_date, {valid_date}, YEAR) < 7
        AND coupon = 5
        AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
    ) AS avg_yield_for_5_percent_coupon_5_yr_to_call,

    (
      SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
      FROM auxiliary_views.materialized_trade_history
      WHERE trade_date BETWEEN {valid_date} AND {end_date}
        AND DATE_DIFF(next_call_date, {valid_date}, YEAR) > 6
        AND DATE_DIFF(next_call_date, {valid_date}, YEAR) < 8
        AND coupon = 5
        AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
    ) AS avg_yield_for_5_percent_coupon_7_yr_to_call,

    (
      SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
      FROM auxiliary_views.materialized_trade_history
      WHERE trade_date BETWEEN {valid_date} AND {end_date}
        AND DATE_DIFF(next_call_date, {valid_date}, YEAR) > 8
        AND DATE_DIFF(next_call_date, {valid_date}, YEAR) < 12
        AND coupon = 5
        AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
    ) AS avg_yield_for_5_percent_coupon_10_yr_to_call,

    (
      SELECT ROUND(AVG(yield), 2) AS yield_0_coupon
      FROM auxiliary_views.materialized_trade_history
      WHERE trade_date BETWEEN {valid_date} AND {end_date}
        AND DATE_DIFF(next_call_date, {valid_date}, YEAR) > 18
        AND DATE_DIFF(next_call_date, {valid_date}, YEAR) < 22
        AND coupon = 5
        AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))
    ) AS avg_yield_for_5_percent_coupon_20_yr_to_call

    FROM auxiliary_views.materialized_trade_history
    WHERE trade_date BETWEEN {valid_date} AND {end_date}
      AND ((sp_long_integer > 1 AND sp_long_integer < 7) OR (moodys_long_integer > 1 AND moodys_long_integer < 7))"""

    result_df = sqltodf(query)

    # Add a 'date' column with the current valid_date
    result_df['date'] = valid_date


    # Append the DataFrame to the list
    dfs.append(result_df)

# Concatenate all DataFrames into one
final_df = pd.concat(dfs, ignore_index=True)

reference_date = "'2022-12-25'"
df = final_df
# Iterate through columns and calculate the differences
for column in df.columns:
    if column != 'date':
        new_column_name = f'change_since_2022_for_{column}'
        df[new_column_name] = round(df[column] - df.loc[df['date'] == reference_date, column].values[0], 4)


In [50]:
df = df[['date'] + [col for col in df.columns if col != 'date']]
table_id = 'eng-reactor-287421.jesse_tests.jr_test'

to_gbq(df, destination_table=table_id, project_id='eng-reactor-287421', if_exists='replace')

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 2832.08it/s]
