In [1]:
#!pip install pyarrow

In [1]:
import pandas as pd
import pytz
import matplotlib.pyplot as plt
import seaborn as sns
import os
import numpy as np
import scipy.optimize as optimize
import math
import locale
from common import (MUNI_SECURITY_TYPE_DICT, COUPON_TYPE_DICT, COUPON_FREQUENCY_DICT, 
                    transform_to_datetime, diff_in_days, actual_diff_in_days, 
                    get_next_coupon_date, get_previous_coupon_date, 
                    get_time_delta_from_interest_frequency, get_ytw_and_price_data, 
                    transform_ytw_and_price_data, get_end_date,
                    is_one_coupon_or_less_till_redemption, REDEMPTION_TYPE, ARD)
from google.cloud import bigquery
from google.api_core.exceptions import BadRequest
from time import sleep
from datetime import datetime,timedelta,date,time
from tqdm import tqdm, tqdm_notebook
from pandas import NaT
sns.set();

locale.setlocale( locale.LC_ALL, 'en_US' )

tqdm_notebook().pandas()

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "../creds.json"

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tqdm_notebook().pandas()


0it [00:00, ?it/s]

### we only support Fixed rate, Original issue discount and Zero coupon, for now. Need to expand this:

In [51]:
bqclient = bigquery.Client()
muni_df = get_ytw_and_price_data(bqclient, '2021-01-13')
transform_ytw_and_price_data(muni_df)

len(muni_df)

19538

In [52]:
n = 0
c = 0
#muni_df = muni_df.loc[1:10]
#len(muni_df)
#len(muni_df[muni_df['interest_payment_frequency']==0])
#len(muni_df[(muni_df['interest_payment_frequency'] == 0) & (muni_df['coupon_rate']!=0)])
#muni_df[((muni_df['coupon_type'] == 'Zero coupon') | (muni_df['coupon_rate']==0)) & (muni_df['is_called'] == True)]

In [53]:
def get_ard(dollar_price, yld, coupon_rate, interest_payment_frequency,
            redemption_value, start_date, settlement_date, max_num_interest_payments):
    
    if pd.isnull(redemption_value):
        return date.max
    A = diff_in_days(settlement_date,start_date)
    if (A < 0):
        A = 0
    B = 360
    P = dollar_price
    Y = yld/100
    R = coupon_rate/100
    RV = redemption_value
    M = interest_payment_frequency
    if (interest_payment_frequency != 0):
        E = B/M

    # formular for all interest at redemption. Notice that this includes ZCBs, i.e. R = 0
    if (interest_payment_frequency == 0):
        num = B*RV + A*Y*P + A*A*Y*R/B - B*P - A*R
        denom = Y*P + A*R*Y/B - R
        # start_date should be the dated_date
        return get_end_date(start_date, num/denom)
    
    # periodic interest payments
    time_delta = get_time_delta_from_interest_frequency(interest_payment_frequency)
    num_interest_payments = 0
    min_diff = float('inf')
    for N in range(1, max_num_interest_payments+1):
        q = abs(RV/(1+Y/M)**(N-1 + (E-A)/E) + (100*R/M)*sum([1/(1+Y/M)**(K+(E-A)/E) for K in range(N)]) - (P + 100*R*A/B))
        if (q < min_diff):
            min_diff = q
            num_interest_payments = N
            
    ard = settlement_date
    for i in range(num_interest_payments):
        ard += time_delta
    return ard
    

In [54]:
def compute_ard(trade):
    ard = ARD(None,date.max,None)
    redemption_options = [(trade.maturity_date,100.0,REDEMPTION_TYPE.MATURITY),
                          (trade.refund_date,trade.refund_price,REDEMPTION_TYPE.CALLED),
                          (trade.par_call_date,trade.par_call_price,REDEMPTION_TYPE.PAR_CALLABLE),
                          (trade.next_call_date,trade.next_call_price,REDEMPTION_TYPE.NEXT_CALLABLE)]
    
    time_delta = get_time_delta_from_interest_frequency(trade.interest_payment_frequency)
    max_num_interest_payments = 0
    prev_coupon_date = trade.dated_date
    if (trade.interest_payment_frequency != 0):
        prev_coupon_date = pd.to_datetime(trade.previous_coupon_payment_date)
        if (pd.isnull(trade.previous_coupon_payment_date)):
            prev_coupon_date = get_previous_coupon_date(trade.first_coupon_date,trade.settlement_date,time_delta)
        
        # multiple coupon payments
        start_date = pd.to_datetime(trade.settlement_date)
        end_date =  pd.to_datetime(trade.maturity_date)
        while start_date < end_date:
            start_date += time_delta
            max_num_interest_payments += 1
        
        
    min_diff = float('inf')
    for o in redemption_options:
        
        # If there is one coupon or less till redemption, the redemption is earliest possible date
        if is_one_coupon_or_less_till_redemption(trade.settlement_date,
                                                 prev_coupon_date,trade.dollar_price,
                                                 trade.interest_payment_frequency,trade.yld,
                                                 trade.coupon_rate,o[1]):
                one_coupon_or_less = True
                if (o[0] < ard.actual_date):
                    # since there is once coupon or less for the case when the redemption
                    # date is at or earlier the next coupon date, let's put the estimated 
                    # date to be the next coupon date (to be revisited later)
                    ard.actual_date = o[0]
                    ard.redemption_type = o[2]
                elif (o[0] == ard.actual_date):
                    ard.add(ARD(None,o[0],o[2]))
        else:
            # ard_o is an estimated ard for this redemption option
            ard_o = get_ard(trade.dollar_price,trade.yld,trade.coupon_rate,
                        trade.interest_payment_frequency,o[1],prev_coupon_date,
                        trade.settlement_date,max_num_interest_payments)
            if (pd.isnull(o[0])):
                continue
            diff = abs(diff_in_days(ard_o, o[0]))
            if (diff < min_diff):
                min_diff = diff
                ard.estimated_date = ard_o
                ard.actual_date = o[0]
                ard.redemption_type = o[2]
            elif (diff == min_diff):
                ard.add(ARD(ard_o, o[0], o[2]))
    
    if trade.cusip == '602737QH4':
        print(locals())
        print("----------------")
    return ard.actual_date;
        

In [56]:
muni_df['yld'] = muni_df.apply(lambda x: getattr(x, 'yield'), axis=1)
muni_df['computed_ard'] = muni_df.progress_apply(lambda x: compute_ard(x),axis=1)

  0%|          | 0/19538 [00:00<?, ?it/s]

{'trade': settlement_date                       2021-01-26 00:00:00
trade_date                                     2021-01-13
cusip                                           602737QH4
dated_date                                     2021-01-26
dollar_price                                       96.347
issue_price                                        97.347
coupon_rate                                           2.0
interest_payment_frequency                            2.0
next_call_date                                 2029-01-15
par_call_date                                  2029-01-15
next_call_price                                     100.0
par_call_price                                      100.0
basic_assumed_maturity_date                    2029-01-15
advanced_assumed_maturity_date                 2029-01-15
maturity_date                         2051-01-15 00:00:00
previous_coupon_payment_date                          NaT
next_coupon_payment_date              2022-01-15 00:00:00
firs

In [48]:
muni_df = muni_df[(muni_df['advanced_assumed_maturity_date'] != muni_df['computed_ard']) & (muni_df['computed_ard'] != muni_df['basic_assumed_maturity_date'] ) ]
muni_df

Unnamed: 0,settlement_date,trade_date,cusip,dated_date,dollar_price,issue_price,coupon_rate,interest_payment_frequency,next_call_date,par_call_date,...,muni_security_type,called_redemption_type,refund_date,refund_price,is_callable,is_called,call_timing,yield,yld,computed_ard
284,2021-01-15,2021-01-13,585488UT4,2019-10-01,124.840,120.226000,4.000,2.0,2029-08-01,2029-08-01,...,Unlimited G.O.,,NaT,,True,False,1.0,0.964,0.964,2030-02-01
296,2021-01-15,2021-01-13,302644DY2,2012-03-14,88.772,97.564734,3.125,2.0,2022-10-01,2022-10-01,...,Revenue,,NaT,,True,False,1.0,5.887,5.887,2025-10-01
297,2021-01-15,2021-01-13,302644DY2,2012-03-14,91.272,97.564734,3.125,2.0,2022-10-01,2022-10-01,...,Revenue,,NaT,,True,False,1.0,5.238,5.238,2025-10-01
298,2021-01-15,2021-01-13,302644DY2,2012-03-14,93.772,97.564734,3.125,2.0,2022-10-01,2022-10-01,...,Revenue,,NaT,,True,False,1.0,4.610,4.610,2025-10-01
299,2021-01-15,2021-01-13,302644DY2,2012-03-14,89.772,97.564734,3.125,2.0,2022-10-01,2022-10-01,...,Revenue,,NaT,,True,False,1.0,5.625,5.625,2025-10-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19267,2021-01-15,2021-01-13,27627MYL5,2020-12-17,100.000,98.077218,2.875,2.0,2027-09-01,2030-09-01,...,Special assessment,,NaT,,True,False,1.0,2.875,2.875,2030-09-01
19433,2021-01-15,2021-01-13,61370PKG2,2016-09-01,99.573,97.110323,2.750,2.0,2023-09-01,2023-09-01,...,Unlimited G.O.,,NaT,,True,False,1.0,2.786,2.786,2035-09-01
19443,2021-02-01,2021-01-13,689900T65,2021-02-01,99.500,100.000000,1.600,2.0,2030-09-01,2030-09-01,...,Unlimited G.O.,,NaT,,True,False,1.0,1.641,1.641,2034-09-01
19468,2021-01-14,2021-01-13,198041CC7,2021-01-14,99.749,101.477829,1.250,2.0,2028-10-01,2028-10-01,...,Lease-rental,,NaT,,True,False,1.0,1.275,1.275,2031-10-01


In [33]:
#ax = muni_df.plot.scatter(x='advanced_assumed_maturity_date',y='computed_ard', c='DarkBlue')

In [34]:
muni_df['ard_delta'] = muni_df.apply(lambda x: abs(diff_in_days(x.computed_ard,x.advanced_assumed_maturity_date)), axis=1)
#muni_df[muni_df['advanced_assumed_maturity_date'] != muni_df['computed_ard']]

In [35]:
delta = muni_df.sort_values(by=['ard_delta'],ascending=False)
delta[1:10]

Unnamed: 0,settlement_date,trade_date,cusip,dated_date,dollar_price,issue_price,coupon_rate,interest_payment_frequency,next_call_date,par_call_date,...,called_redemption_type,refund_date,refund_price,is_callable,is_called,call_timing,yield,yld,computed_ard,ard_delta
13989,2021-01-15,2021-01-13,64542UDY7,2018-09-04,105.55,96.213841,5.5,2.0,2024-07-01,2027-07-01,...,,NaT,,True,False,1.0,4.5,4.5,2027-07-01,9720
4100,2021-01-14,2021-01-13,74446GAC5,2019-09-04,101.25,100.0,5.75,2.0,2027-05-01,2029-05-01,...,,NaT,,True,False,1.0,5.558,5.558,2029-05-01,9000
4099,2021-01-14,2021-01-13,74446GAC5,2019-09-04,100.75,100.0,5.75,2.0,2027-05-01,2029-05-01,...,,NaT,,True,False,1.0,5.634,5.634,2029-05-01,9000
7501,2021-01-15,2021-01-13,97712DWU6,2017-05-31,97.8,100.0,5.25,2.0,2023-05-01,2024-05-01,...,,NaT,,True,False,1.0,5.407,5.407,2047-05-01,8280
7503,2021-01-15,2021-01-13,97712DWU6,2017-05-31,95.85,100.0,5.25,2.0,2023-05-01,2024-05-01,...,,NaT,,True,False,1.0,5.551,5.551,2047-05-01,8280
7502,2021-01-15,2021-01-13,97712DWU6,2017-05-31,97.55,100.0,5.25,2.0,2023-05-01,2024-05-01,...,,NaT,,True,False,1.0,5.425,5.425,2047-05-01,8280
1603,2021-01-26,2021-01-13,602737QH4,2021-01-26,96.347,97.347,2.0,2.0,2029-01-15,2029-01-15,...,,NaT,,True,False,1.0,2.166,2.166,2051-01-15,7920
4138,2021-01-15,2021-01-13,546589P55,2020-07-30,99.414,97.38336,2.375,2.0,2028-05-15,2028-05-15,...,,NaT,,True,False,1.0,2.403,2.403,2050-05-15,7920
18177,2021-01-15,2021-01-13,928104ND6,2020-10-15,102.114,96.769416,3.375,2.0,2027-07-01,2030-07-01,...,,NaT,,True,False,1.0,3.115,3.115,2030-07-01,7380


In [20]:
#how many did we get right: 
print(len(muni_df[muni_df['computed_ytw'] == muni_df['yield']])/len(muni_df))
print(len(muni_df[abs(muni_df['computed_ytw'] - muni_df['yield']) <= 0.001])/len(muni_df))

0.9835704780427884
0.9910942778175862
