Step 1: Import necessary libraries and set up the environment

In [21]:
import pandas as pd
import numpy as np
import os
from google.cloud import bigquery
from ficc.utils.auxiliary_functions import sqltodf, function_timer
from ficc.pricing.auxiliary_functions import transform_reference_data
from ficc.pricing.price import compute_price
from ficc.pricing.yield_rate import compute_yield

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/gil/git/ficc/creds.json'

# Set up BigQuery client
client = bigquery.Client()


#### Step 2: Define the SQL query and retrieve the data

##### Explanation of restrictions:

- `DATE_DIFF(maturity_date, trade_date, day) > 400`: Remove bonds that are close to maturity or other key dates.
- `(next_call_date IS NULL OR DATE_DIFF(next_call_date, trade_date, day) > 400)`: Exclude bonds close to their next call date.
- `(refund_date IS NULL OR DATE_DIFF(refund_date, trade_date, day) > 400)`: Exclude bonds close to their refund date.
- `trade_date > "2023-12-19" AND trade_date < "2024-03-21"`: Focus on trades within this date range.
- `par_traded > 500000`: Focus on round lots (large trades).
- `federal_tax_status = 2`: Only include tax-exempt bonds.


In [None]:
# Define the query function with cleaned up naming
query = """
SELECT
    historical.new_ys_prediction,
    historical.new_ficc_ycl,
    historical.new_ys_prediction,
    IFNULL(trade_history.settlement_date, trade_history.assumed_settlement_date) AS settlement_date,
    trade_history.trade_date,
    historical.cusip,
    trade_history.accrual_date,
    trade_history.dollar_price,
    trade_history.issue_price,
    trade_history.coupon,
    trade_history.interest_payment_frequency,
    trade_history.next_call_date,
    trade_history.par_call_date,
    trade_history.next_call_price,
    trade_history.par_call_price,
    trade_history.maturity_date,
    trade_history.previous_coupon_payment_date,
    trade_history.next_coupon_payment_date,
    trade_history.first_coupon_date,
    trade_history.coupon_type,
    trade_history.muni_security_type,
    trade_history.called_redemption_type,
    trade_history.refund_date,
    trade_history.refund_price,
    trade_history.is_callable,
    trade_history.is_called,
    trade_history.call_timing,
    trade_history.yield,
    trade_history.rtrs_control_number,
    trade_history.has_zero_coupons,
    trade_history.last_period_accrues_from_date,
    trade_history.par_traded,
    trade_history.federal_tax_status,
    trade_history.call_defeased,
    trade_history.sp_long,
    trade_history.incorporated_state_code,
    trade_history.purpose_class    
FROM 
    `historic_predictions.historical_predictions` historical
LEFT JOIN
    auxiliary_views_v2.trade_history_same_issue_5_yr_mat_bucket_1_materialized trade_history
ON
    historical.rtrs_control_number = trade_history.rtrs_control_number
WHERE
    historical.trade_date > "2023-12-19"
    AND historical.trade_date < "2024-05-27"
    AND trade_history.par_traded > 100000
    AND trade_history.federal_tax_status = 2
    AND TIME(trade_history.time_of_trade) < "09:30:00"
ORDER BY trade_history.coupon ASC

"""

# Retrieve the data and convert to a DataFrame
muni_df = sqltodf(query, client)
len(muni_df)


55821

Calculate the Yield to Worst (YTW):

In [23]:
# Calculate YTW (Yield to Worst) by adding new_ys to ficc YCL in historical_predictions
data = muni_df
data['ficc_ytw'] = (data['new_ys_prediction'] + data['new_ficc_ycl'])/100
len(data)


55821

Step 4: Convert YTW to Dollar using compute_price function

In [24]:
# Fill NA values in called_redemption_type with 0
data['called_redemption_type'] = data['called_redemption_type'].fillna(0)

# Convert call_defeased to string and fill NA with '0'
data['call_defeased'] = data['call_defeased'].astype(str).fillna('0')

# switch = data.called_redemption_type.isin([1.0, 5.0]) & (data.call_defeased != "1")
# data.loc[switch, 'is_callable'] = True

data['price_calc_from_yield'] = data.apply(lambda x: compute_price(x,x.ficc_ytw), axis=1)
data["price_from_yield"] = [x[0] for x in data['price_calc_from_yield']]
len(data)

55821

Step 5: Compute the delta between predicted price and actual price

In [25]:
# Compute the delta between the predicted price and the actual price
if 'price_calc_from_yield' in data.columns and 'dollar_price' in data.columns:
    data['price_delta'] = data['price_from_yield'] - data['dollar_price']
else:
    print("Necessary columns 'price_pred' and 'dollar_price' not found in the data.")

len(data)


55821

Step 6: Apply exclusions to the data

In [26]:
import pandas as pd
import numpy as np

# Calculate days to maturity, call, and refund correctly
data.loc[:, 'days_to_maturity'] = (data['maturity_date'] - data['settlement_date']).dt.days.fillna(0).astype(int)
data.loc[:, 'days_to_call'] = (data['next_call_date'] - data['settlement_date']).dt.days.fillna(0).astype(int)
data.loc[:, 'days_to_refund'] = (data['refund_date'] - data['settlement_date']).dt.days.fillna(0).astype(int)
data['years_to_maturity'] = (data['days_to_maturity'] // 365).astype(int)

def apply_exclusions(data: pd.DataFrame, dataset_name: str = None):
    from_dataset_name = f' from {dataset_name}' if dataset_name is not None else ''
    data_before_exclusions = data[:]
    
    previous_size = len(data)
    data = data[(data['days_to_call'] == 0) | (data['days_to_call'] > 400)]
    current_size = len(data)
    if previous_size != current_size: print(f'Removed {previous_size - current_size} trades{from_dataset_name} for having 0 < days_to_call <= 400')
    
    previous_size = current_size
    data = data[(data['days_to_refund'] == 0) | (data['days_to_refund'] > 400)]
    current_size = len(data)
    if previous_size != current_size: print(f'Removed {previous_size - current_size} trades{from_dataset_name} for having 0 < days_to_refund <= 400')
    
    previous_size = current_size
    data = data[(data['days_to_maturity'] == 0) | (data['days_to_maturity'] > 400)]
    current_size = len(data)
    if previous_size != current_size: print(f'Removed {previous_size - current_size} trades{from_dataset_name} for having 0 < days_to_maturity <= 400')
    
    previous_size = current_size
    data = data[data['days_to_maturity'] < 30000]
    current_size = len(data)
    if previous_size != current_size: print(f'Removed {previous_size - current_size} trades{from_dataset_name} for having days_to_maturity >= 30000')
    
    return data, data_before_exclusions

# Apply exclusions
# data, data_before_exclusions = apply_exclusions(data)


Step 7: Calculate the average Mean Absolute Error (MAE) for dollar errors

In [27]:
# Calculate the average MAE for dollar errors
average_mae = data['price_delta'].abs().mean()

# Print the formatted average MAE and sample size
print(f"Average MAE for dollar errors: {round(average_mae, 3)}, Sample: {len(data):,}")

Average MAE for dollar errors: 0.324, Sample: 55,821


In [28]:
PURPOSE_CLASS_DICT = {1.0: 'Authority', 2.0: 'Bond Anticipation Note', 3.0: 'Building', 
                      4.0: 'Capital Loan Notes', 5.0: 'Certificates Of Participation', 
                      6.0: 'Corporate Purpose', 7.0: 'Drainage District', 
                      8.0: 'Economic Development Revenue', 9.0: 'Education', 
                      10.0: 'Environment', 11.0: 'Equipment', 12.0: 'Federal Aid Note', 
                      13.0: 'Fire District', 14.0: 'Flood Water Sewer Disposal', 
                      15.0: 'General Purpose', 16.0: 'Grant Anticipation Note', 
                      17.0: 'Health', 18.0: 'Housing', 19.0: 'Improvement', 
                      20.0: 'Industrial Development Revenue', 21.0: 'Library District', 
                      22.0: 'Metropolitan District', 23.0: 'Miscellaneous', 
                      24.0: 'Municipal Bond Bank', 25.0: 'Municipal Utilities District', 
                      26.0: 'Park District', 27.0: 'Parking Authority', 
                      28.0: 'Pollution Control Revenue', 29.0: 'Pool Financing Authority', 
                      30.0: 'Promissory Note', 31.0: 'Public Housing Authority', 
                      32.0: 'Public Utility District', 33.0: 'Racetrack/Casino', 
                      34.0: 'Recreation Authority', 35.0: 'Revenue Anticipation Note', 
                      36.0: 'Sanitation District', 37.0: 'School District', 
                      38.0: 'Special Assessment', 39.0: 'Special Obligation', 
                      40.0: 'State and Federal Anticipation Note', 
                      41.0: 'State and Grant Anticipation Note', 42.0: 'State Anticipation Notes', 
                      43.0: 'State Store', 44.0: 'Tax and Revenue Anticipation', 
                      45.0: 'Tax Anticipation Note', 46.0: 'Tax Revenue', 
                      47.0: 'Temporary Notes', 48.0: 'Transportation', 
                      49.0: 'Tribal Bonds', 50.0: 'Utility', 51.0: 'Various Purpose', 
                      52.0: 'Veterans', 53.0: 'Warrants'}

data['purpose_class_desc'] = data['purpose_class'].map(PURPOSE_CLASS_DICT)


In [29]:
def calculate_mae_and_count(group):
    mae = group['price_delta'].abs().mean()
    count = len(group)
    return pd.Series({'mae_price_delta': mae, 'count': count})

mae_by_sp_long = data.groupby('sp_long').apply(calculate_mae_and_count).reset_index()
mae_by_state_code = data.groupby('incorporated_state_code').apply(calculate_mae_and_count).reset_index()
mae_by_purpose_class = data.groupby('purpose_class_desc').apply(calculate_mae_and_count).reset_index()
mae_by_years_to_maturity = data.groupby('years_to_maturity').apply(calculate_mae_and_count).reset_index()

threshold = 100  # Define a threshold for minimum sample count
mae_by_sp_long = mae_by_sp_long[mae_by_sp_long['count'] >= threshold]
mae_by_state_code = mae_by_state_code[mae_by_state_code['count'] >= threshold]
mae_by_purpose_class = mae_by_purpose_class[mae_by_purpose_class['count'] >= threshold]
mae_by_years_to_maturity = mae_by_years_to_maturity[mae_by_years_to_maturity['count'] >= threshold]

print("MAE by sp_long:")
print(mae_by_sp_long)
print("\nMAE by incorporated_state_code:")
print(mae_by_state_code)
print("\nMAE by purpose_class:")
print(mae_by_purpose_class)
print("\nMAE by years_to_maturity:")
print(mae_by_years_to_maturity)




MAE by sp_long:
   sp_long  mae_price_delta    count
0        A         0.307090   1405.0
1       A+         0.278575   2454.0
2       A-         0.357096   1171.0
3       AA         0.352610  11932.0
4      AA+         0.283552   9635.0
5      AA-         0.302834   6705.0
6      AAA         0.282379  10513.0
12     BBB         2.739591    110.0
13    BBB+         0.537223    291.0
18      NR         0.243289    453.0

MAE by incorporated_state_code:
   incorporated_state_code  mae_price_delta   count
0                       AK         0.180612   103.0
1                       AL         1.060975   403.0
2                       AR         0.350031   130.0
3                       AZ         0.254857   898.0
4                       CA         0.331328  6725.0
5                       CO         0.319683   921.0
6                       CT         0.228945   968.0
7                       DC         0.258115   410.0
8                       DE         0.277531   179.0
9                       