In [1]:
###Calc_date_and_price notebook 3.15.2022

In [2]:
import os
from google.cloud import bigquery

os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="../creds.json"
bqclient = bigquery.Client()

project = "eng-reactor-287421"

In [3]:
import pandas

In [4]:
 NUM_OF_DAYS_IN_YEAR = 360

COUPON_FREQUENCY_DICT = {0:"Unknown",
                        1:"Semiannually",
                        2:"Monthly",
                        3:"Annually",
                        4:"Weekly",
                        5:"Quarterly",
                        6:"Every 2 years",
                        7:"Every 3 years",
                        8:"Every 4 years",
                        9:"Every 5 years",
                        10:"Every 7 years",
                        11:"Every 8 years",
                        12:"Biweekly",
                        13:"Changeable",
                        14:"Daily",
                        15:"Term mode",
                        16:"Interest at maturity",
                        17:"Bimonthly",
                        18:"Every 13 weeks",
                        19:"Irregular",
                        20:"Every 28 days",
                        21:"Every 35 days",
                        22:"Every 26 weeks",
                        23:"Not Applicable",
                        24:"Tied to prime",
                        25:"One time",
                        26:"Every 10 years",
                        27:"Frequency to be determined",
                        28:"Mandatory put",
                        29:"Every 52 weeks",
                        30:"When interest adjusts-commercial paper",
                        31:"Zero coupon",
                        32:"Certain years only",
                        33:"Under certain circumstances",
                        34:"Every 15 years",
                        35:"Custom",
                        36:"Single Interest Payment"}

LARGE_NUMBER = 1e6

COUPON_FREQUENCY_TYPE = {"Unknown":LARGE_NUMBER,
                         "Semiannually":2,
                         "Monthly":12,
                         "Annually":1,
                         "Weekly":52,
                         "Quarterly":4,
                         "Every 2 years":0.5,
                         "Every 3 years":1/3,
                         "Every 4 years":0.25,
                         "Every 5 years":0.2,
                         "Every 7 years":1/7,
                         "Every 8 years":1/8,
                         "Biweekly": 26,
                         "Changeable":44,
                         "Daily":360,
                         "Interest at maturity":0,
                         "Not Applicable":LARGE_NUMBER}


In [5]:
NUM_OF_DAYS_IN_YEAR = 360

'''
This function compares two date objects whether they are in Timestamp or datetime.date. 
The different types are causing a future warning. If date1 occurs after date2, return 1. 
If date1 equals date2, return 0. Otherwise, return -1.
'''
def compare_dates(date1, date2):
    if type(date1) == pd.Timestamp:
        date1 = date1.date()
    if type(date2) == pd.Timestamp:
        date2 = date2.date()
    
    if date1 > date2:
        return 1
    elif date1 == date2:
        return 0
    elif date1 < date2:
        return -1

'''
 # @ Create Time: 2021-12-20 10:00:17
 # @ Modified by: Developer
 # @ Modified time: 2022-01-20 09:33:30
 # @ Description: This file implements a function to calculate the difference in 
 # days between two days in accordance to the provision of MSRB rule 33G
 '''

import pandas as pd

'''
This function calculates the difference in days using the 360/30 
convention specified in MSRB Rule Book G-33, rule (e). 
Note that we only handle the 360/30 convention for date calculations.
'''
def diff_in_days_two_dates(end_date, start_date, convention="360/30"):
    if convention != "360/30":
        print("unknown convention", convention)
        return None

    Y2 = end_date.year
    Y1 = start_date.year
    M2 = end_date.month
    M1 = start_date.month
    D2 = end_date.day
    D1 = start_date.day
    D1 = min(D1, 30)
    if D1 == 30: 
        D2 = min(D2, 30)
    return (Y2 - Y1) * 360 + (M2 - M1) * 30 + (D2 - D1)

def diff_in_days(trade, convention="360/30", **kwargs):
    #See MSRB Rule 33-G for details
    if 'calc_type' in kwargs:
        if kwargs['calc_type'] == 'accrual' and not pd.isnull(trade.accrual_date):
            start_date = trade.accrual_date
            end_date = trade.settlement_date
        else:
            raise ValueError('Invalid arguments')
    else:
        start_date = trade.dated_date
        end_date = trade.settlement_date

    return diff_in_days_two_dates(end_date, start_date, convention)

'''
 # @ Create Time: 2022-01-13 18:02:00
 # @ Description: This file implements functions for truncating final outputs.
 '''

'''
This file truncations an input to a specified number of decimal
places. See the doctests.
'''
def trunc(x, decimal_places):
    """
    >>> trunc(3.33333, 3)
    3.333
    >>> trunc(3.99499, 3)
    3.994
    >>> trunc(30.99499, 3)
    30.994
    """
    ten_places = 10 ** decimal_places
    return ((x * ten_places) // 1) / ten_places

'''
This function rounds the final price according to 
MSRB Rule Book G-33, rule (d).
'''
def trunc_and_round_price(price):
    return trunc(price, 3)

'''
This function rounds the final yield according to 
MSRB Rule Book G-33, rule (d).
'''
def trunc_and_round_yield(yield_rate):
    return round(trunc(yield_rate, 4), 3)

'''
 # @ Author: Developer 
 # @ Create Time: 2021-12-15 13:59:54
 # @ Modified by: Developer
 # @ Modified time: 2022-02-10 10:49:09
 # @ Description: This file contains function to help the functions 
 # to process training data
 '''
import pandas as pd

def sqltodf(sql, bq_client):
    bqr = bq_client.query(sql).result()
    return bqr.to_dataframe()


def drop_extra_columns(df):
    df.drop(columns=[
                 'sp_stand_alone',
                 'sp_icr_school',
                 'sp_icr_school',
                 'sp_icr_school',
                 'sp_watch_long',
                 'sp_outlook_long',
                 'sp_prelim_long',
                 'MSRB_maturity_date',
                 'MSRB_INST_ORDR_DESC',
                 'MSRB_valid_from_date',
                 'MSRB_valid_to_date',
                 'upload_date',
                 'sequence_number',
                 'ref_valid_from_date',
                 'ref_valid_to_date',
                 'additional_next_sink_date',
                 'last_period_accrues_from_date',
                 'primary_market_settlement_date',
                 'assumed_settlement_date',
                 'sale_date','q','d'],
                  inplace=True)
    
    
    return df


def convert_dates(df):
    date_cols = [col for col in list(df.columns) if 'DATE' in col.upper()]
    for col in date_cols:
        df[col] = pd.to_datetime(df[col])
    
    return df

'''
This function  
'''
def process_ratings(df):
    df = df[df.sp_long.isin(['BBB+','A-','A','A+','AA-','AA','AA+','AAA','NR'])] 
    df['rating'] = df['sp_long']
    return df
    
'''
This function extracts the features of the latest trade from 
the trade history array
'''
def get_latest_trade_feature(x, feature):
    recent_trade = x[0]
    if feature == 'yield_spread':
        return recent_trade[0]
    elif feature == 'seconds_ago':
        return recent_trade[-1]
    elif feature == 'par_traded':
        return recent_trade[1]

'''
This function compares two date objects whether they are in Timestamp or datetime.date. 
The different types are causing a future warning. If date1 occurs after date2, return 1. 
If date1 equals date2, return 0. Otherwise, return -1.
'''
def compare_dates(date1, date2):
    if type(date1) == pd.Timestamp:
        date1 = date1.date()
    if type(date2) == pd.Timestamp:
        date2 = date2.date()
    
    if date1 > date2:
        return 1
    elif date1 == date2:
        return 0
    elif date1 < date2:
        return -1

'''
This function directly calls `compare_dates` to check if two dates are equal.
'''
def dates_are_equal(date1, date2):
    return compare_dates(date1, date2) == 0

'''
This function converts the columns with object datatypes to category data types
'''
def convert_object_to_category(df):
    print("Converting object data type to categorical data type")
    for col_name in df.columns:
        if col_name.endswith("event") or col_name.endswith("redemption") or col_name.endswith("history") or col_name.endswith("date"):
            continue

        if df[col_name].dtype == "object" and col_name not in ['organization_primary_name','security_description','recent','issue_text','series_name']:
            df[col_name] = df[col_name].astype("category")
    return df

'''
 # @ Create Time: 2022-01-13 17:58:00
 # @ Modified by: Developer
 # @ Modified time: 2022-01-24 12:19:00
 # @ Description: This file implements functions for bonds that have been called.
 '''

import pandas as pd

'''
This function provides the end date for a called bond. 
'''
def end_date_for_called_bond(trade):
    if not pd.isnull(trade.refund_date):
        return trade.refund_date
    else:
        raise ValueError(f"Bond (CUSIP: {trade.cusip}, RTRS: {trade.rtrs_control_number}) is called, but no refund date.")

'''
This function provides the par value for a called bond.
'''
def refund_price_for_called_bond(trade):
    if not pd.isnull(trade.refund_price):
        return trade.refund_price
    else:
        raise ValueError(f"Bond (CUSIP: {trade.cusip}, RTRS: {trade.rtrs_control_number}) is called, but no refund price.")

'''
 # @ Create Time: 2022-01-13 23:04:00
 # @ Modified by: Developer
 # @ Modified time: 2022-02-25 14:04:00
 # @ Description: This file implements functions to compute the price of a trade
 # given the yield.
 '''




In [6]:
'''
 # @ Author: Developer 
 # @ Create Time: 2021-12-15 13:59:54
 # @ Modified by: Developer
 # @ Modified time: 2022-02-10 10:49:09
 # @ Description: This file contains function to help the functions 
 # to process training data
 '''
import pandas as pd

def sqltodf(sql, bq_client):
    bqr = bq_client.query(sql).result()
    return bqr.to_dataframe()


def drop_extra_columns(df):
    df.drop(columns=[
                 'sp_stand_alone',
                 'sp_icr_school',
                 'sp_icr_school',
                 'sp_icr_school',
                 'sp_watch_long',
                 'sp_outlook_long',
                 'sp_prelim_long',
                 'MSRB_maturity_date',
                 'MSRB_INST_ORDR_DESC',
                 'MSRB_valid_from_date',
                 'MSRB_valid_to_date',
                 'upload_date',
                 'sequence_number',
                 'ref_valid_from_date',
                 'ref_valid_to_date',
                 'additional_next_sink_date',
                 'last_period_accrues_from_date',
                 'primary_market_settlement_date',
                 'assumed_settlement_date',
                 'sale_date','q','d'],
                  inplace=True)
    
    
    return df


def convert_dates(df):
    date_cols = [col for col in list(df.columns) if 'DATE' in col.upper()]
    for col in date_cols:
        df[col] = pd.to_datetime(df[col])
    
    return df

'''
This function  
'''
def process_ratings(df):
    df = df[df.sp_long.isin(['BBB+','A-','A','A+','AA-','AA','AA+','AAA','NR'])] 
    df['rating'] = df['sp_long']
    return df
    
'''
This function extracts the features of the latest trade from 
the trade history array
'''
def get_latest_trade_feature(x, feature):
    recent_trade = x[0]
    if feature == 'yield_spread':
        return recent_trade[0]
    elif feature == 'seconds_ago':
        return recent_trade[-1]
    elif feature == 'par_traded':
        return recent_trade[1]

'''
This function compares two date objects whether they are in Timestamp or datetime.date. 
The different types are causing a future warning. If date1 occurs after date2, return 1. 
If date1 equals date2, return 0. Otherwise, return -1.
'''
def compare_dates(date1, date2):
    if type(date1) == pd.Timestamp:
        date1 = date1.date()
    if type(date2) == pd.Timestamp:
        date2 = date2.date()
    
    if date1 > date2:
        return 1
    elif date1 == date2:
        return 0
    elif date1 < date2:
        return -1

'''
This function directly calls `compare_dates` to check if two dates are equal.
'''
def dates_are_equal(date1, date2):
    return compare_dates(date1, date2) == 0

'''
This function converts the columns with object datatypes to category data types
'''
def convert_object_to_category(df):
    print("Converting object data type to categorical data type")
    for col_name in df.columns:
        if col_name.endswith("event") or col_name.endswith("redemption") or col_name.endswith("history") or col_name.endswith("date"):
            continue

        if df[col_name].dtype == "object" and col_name not in ['organization_primary_name','security_description','recent','issue_text','series_name']:
            df[col_name] = df[col_name].astype("category")
    return df

In [7]:
'''
 # @ Create Time: 2022-01-13 17:54:00
 # @ Description: This file implements functions to handle interest payment
 # frequency information of a bond.
 '''

from dateutil.relativedelta import relativedelta



'''
This function returns the frequency of coupon payments based on 
the interest payment frequency identifier in the bond reference data.
'''
def get_frequency(identifier):
    # check whether the frequency dict has already been applied to the identifier
    if type(identifier) == str:
        return COUPON_FREQUENCY_TYPE[identifier]
    else:
        return COUPON_FREQUENCY_TYPE[COUPON_FREQUENCY_DICT[identifier]]

'''
This function returns a time delta object based on the interest payment frequency. 
The first step is to identify whether the interest payment frequency passed in 
is in terms of the number of months in a year or the number of weeks in a year. 
Then, based on this the time delta object is returned.
'''
def get_time_delta_from_interest_frequency(interest_payment_frequency):
    error_string = lambda num: f"The interest payment frequency of {interest_payment_frequency} is invalid, since it must divide {num}"

    NUM_OF_MONTHS_IN_YEAR = 12
    NUM_OF_WEEKS_IN_YEAR = 52
    NUM_OF_DAYS_IN_YEAR = 360

    time_delta = 0
    if interest_payment_frequency != 0:
        if interest_payment_frequency <= 1:
            delta = 1 / interest_payment_frequency
            time_delta = relativedelta(years=delta)
        elif interest_payment_frequency > 1 and interest_payment_frequency <= NUM_OF_MONTHS_IN_YEAR:
            if NUM_OF_MONTHS_IN_YEAR % interest_payment_frequency != 0:
                raise ValueError(error_string(NUM_OF_MONTHS_IN_YEAR))
            delta = NUM_OF_MONTHS_IN_YEAR / interest_payment_frequency
            time_delta = relativedelta(months=delta)
        elif interest_payment_frequency > NUM_OF_MONTHS_IN_YEAR and interest_payment_frequency <= NUM_OF_WEEKS_IN_YEAR:
            if NUM_OF_WEEKS_IN_YEAR % interest_payment_frequency != 0:
                raise ValueError(error_string(NUM_OF_WEEKS_IN_YEAR))
            delta = NUM_OF_WEEKS_IN_YEAR / interest_payment_frequency
            time_delta = relativedelta(weeks=delta)
        elif interest_payment_frequency > NUM_OF_WEEKS_IN_YEAR and interest_payment_frequency <= NUM_OF_DAYS_IN_YEAR:
            if NUM_OF_DAYS_IN_YEAR % interest_payment_frequency != 0:
                raise ValueError(error_string(NUM_OF_DAYS_IN_YEAR))
            delta = NUM_OF_DAYS_IN_YEAR / interest_payment_frequency
            time_delta = relativedelta(days=delta)
    return time_delta

In [8]:
'''
 # @ Create Time: 2022-01-13 17:44:00
 # @ Description: This file implements functions to help with pricing bonds
 # and computing yields.
 '''

import pandas as pd



'''
This function takes the dataframe from the bigquery and updates certain 
fields to be the right type. Note that this function mutates the fields 
passed in dataframe, so the function itself has no return value.
'''
def transform_reference_data(df):
    df['interest_payment_frequency'] = df.apply(lambda trade: get_frequency(trade["interest_payment_frequency"]), axis=1)
    df['coupon'] = df['coupon'].astype(float)
    df['yield'] = df['yield'].astype(float)
    df['deferred'] = (df.interest_payment_frequency == 0) | df.coupon == 0
    
    df['next_call_price'] = df['next_call_price'].astype(float)
    return df

'''
This function computes the next time a coupon is paid.
Note that this function could return a `next_coupon_date` that is after the end_date. 
This does not create a problem since we deal with the final coupon separately in 
`price_of_bond_with_multiple_periodic_interest_payments`.
Note that it may be that this function is not necessary because the field 
`next_coupon_date` is never null when there is a "next coupon date." In the 
future, we should confirm whether this is the case.
'''
def get_next_coupon_date(first_coupon_date, start_date, time_delta):
    date = first_coupon_date
    while compare_dates(date, start_date) < 0:
        date = date + time_delta
    return date
#     cannot use the below code since division is not valid between datetime.timedelta and relativedelta, and converting between types introduces potential for errors
#     num_of_time_periods = int(np.ceil((start_date - first_coupon_date) / time_delta))    # `int` wraps the `ceil` function because the `ceil` function returns a float
#     return first_coupon_date + time_delta * num_of_time_periods

'''
This function computes the previous time a coupon was paid for this bond 
by relating it to the next coupon date.
Note that it may be that this function is not necessary because the field 
`previous_coupon_date` is never null when `next_coupon_date` exists. In the 
future, we should confirm whether this is the case.
'''
def get_previous_coupon_date(first_coupon_date, start_date, accrual_date, time_delta, next_coupon_date=None):
    if next_coupon_date == None:
        next_coupon_date = get_next_coupon_date(first_coupon_date, start_date, time_delta)   
    if dates_are_equal(next_coupon_date, first_coupon_date):
        return accrual_date
    return next_coupon_date - time_delta

'''
This function is valid for bonds that don't pay coupons, whereas the previous 
two functions assume the bond pays coupons.
Note: the field of `next_coupon_payment_date` corresponds to our variable of 
`next_coupon_date` (removing the word `payment`) for more concise and readable 
code, and similarly with `previous_coupon_date`
'''
def get_prev_coupon_date_and_next_coupon_date(trade, frequency, time_delta):
    if frequency == 0:
        next_coupon_date = trade.maturity_date
        prev_coupon_date = trade.accrual_date
    else:
        if pd.isnull(trade.next_coupon_payment_date):
            if pd.isnull(trade.first_coupon_date):
                print(f'no coupon date for {trade.rtrs_control_number, trade.cusip}')
                return None, None
            else: 
                next_coupon_date = get_next_coupon_date(trade.first_coupon_date, trade.settlement_date, time_delta)
        else:
            next_coupon_date = pd.to_datetime(trade.next_coupon_payment_date)

        if pd.isnull(trade.previous_coupon_payment_date):
            prev_coupon_date = get_previous_coupon_date(trade.first_coupon_date, trade.settlement_date, trade.accrual_date, time_delta, next_coupon_date)
        else:
            prev_coupon_date = pd.to_datetime(trade.previous_coupon_payment_date)

    return prev_coupon_date, next_coupon_date

'''
This function returns the number of interest payments and the final coupon 
date based on the next coupon date, the end date, and the gap between coupon 
payments. This function returns both together because one is always a 
byproduct of computing the other.
Note that the special case of an odd final coupon is handled below in 
`price_of_bond_with_multiple_periodic_interest_payments`.
'''
def get_num_of_interest_payments_and_final_coupon_date(next_coupon_date, end_date, time_delta): 
    if compare_dates(next_coupon_date, end_date) > 0:
        return 0, next_coupon_date    # return 1, end_date (would be valid in isolation)
    
    num_of_interest_payments = 1
    final_coupon_date = next_coupon_date
    while compare_dates(final_coupon_date + time_delta, end_date) <= 0:
        num_of_interest_payments += 1
        final_coupon_date += time_delta
    return num_of_interest_payments, final_coupon_date

'''
This function is called when the interest is only paid at maturity (which is represented 
in the transformed dataframe as interest payment frequency equaling 0). There are two 
cases when interest is paid at maturity. The first case is for short term bonds where 
there is a single coupon payment at maturity, and this logic will reduce to the logic 
in MSRB Rule Book G-33, rule (b)(i)(A). The second case is when when there is a compounding 
accreted value (i.e., capital appreciation bonds) which accrues semianually. Then, to get 
the price of this bond, we need to account for the accrued interest. This can be thought 
of as a bond that pays a coupon semiannually through the duration of the bond, but all the 
coupon payments are made as a single payment at the time the bond is called / maturity. 
For more info and an example, see the link: 
https://www.investopedia.com/terms/c/cav.asp#:~:text=Compound%20accreted%20value%20(CAV)%20is,useful%20metric%20for%20bond%20investors.
'''
def price_of_bond_with_interest_at_maturity(cusip,    # can be used for debugging purposes
                                            settlement_date, 
                                            accrual_date, 
                                            end_date, 
                                            yield_rate, 
                                            coupon, 
                                            RV):
    NOMINAL_FREQUENCY = 2    # semiannual interest payment frequency
    accrual_date_to_settlement_date = diff_in_days_two_dates(settlement_date, accrual_date)
    settlement_date_to_end_date = diff_in_days_two_dates(end_date, settlement_date)
    accrued = coupon * accrual_date_to_settlement_date / NUM_OF_DAYS_IN_YEAR
    num_of_periods_from_settlement_date_to_end_date = settlement_date_to_end_date / (NUM_OF_DAYS_IN_YEAR / NOMINAL_FREQUENCY)
    denom = (1 + yield_rate / NOMINAL_FREQUENCY) ** num_of_periods_from_settlement_date_to_end_date
    accrual_date_to_end_date = diff_in_days_two_dates(end_date, accrual_date)
    base = (RV + coupon * accrual_date_to_end_date / NUM_OF_DAYS_IN_YEAR) / denom
    return base - accrued

'''
This function computes the price of a bond with multiple periodic interest 
payments using MSRB Rule Book G-33, rule (b)(i)(B)(2). Comments with capital 
letter symbols represent those same symbols seen in formula in MSRB rule book.
'''
def price_of_bond_with_multiple_periodic_interest_payments(cusip,    # can be used for debugging purposes
                                                           settlement_date, 
                                                           accrual_date,
                                                           first_coupon_date, 
                                                           prev_coupon_date, 
                                                           next_coupon_date,    
                                                           final_coupon_date, 
                                                           end_date, 
                                                           frequency,
                                                           num_of_interest_payments, 
                                                           yield_rate,
                                                           coupon, 
                                                           RV, 
                                                           time_delta, 
                                                           last_period_accrues_from_date):
    num_of_days_in_period = NUM_OF_DAYS_IN_YEAR / frequency
    discount_rate = 1 + yield_rate / frequency    # 1 + Y / M
    final_coupon_date_to_end_date = diff_in_days_two_dates(end_date, final_coupon_date)
    prev_coupon_date_to_settlement_date = diff_in_days_two_dates(settlement_date, prev_coupon_date)    # A
    interest_due_at_end_date = coupon * final_coupon_date_to_end_date / NUM_OF_DAYS_IN_YEAR
    
    RV_and_interest_due_at_end_date = RV + interest_due_at_end_date
    settlement_date_to_next_coupon_date = diff_in_days_two_dates(next_coupon_date, settlement_date)    # E - A
    settlement_date_to_next_coupon_date_frac = settlement_date_to_next_coupon_date / num_of_days_in_period    # (E - A) / E
    final_coupon_date_to_end_date_frac = final_coupon_date_to_end_date / num_of_days_in_period
    num_of_periods_from_settlement_date_to_end_date = num_of_interest_payments - 1 + settlement_date_to_next_coupon_date_frac + final_coupon_date_to_end_date_frac
    
    RV_and_interest_due_at_end_date_discounted = RV_and_interest_due_at_end_date / (discount_rate ** num_of_periods_from_settlement_date_to_end_date)
    
    # The following logic statements are necessary to address odd first and final coupons
    if dates_are_equal(next_coupon_date, first_coupon_date):
        num_of_days_in_current_interest_payment_period = diff_in_days_two_dates(first_coupon_date, accrual_date)
    elif not pd.isna(last_period_accrues_from_date) and compare_dates(settlement_date, last_period_accrues_from_date + time_delta) > 0:    # this logic has not been tested
        num_of_days_in_current_interest_payment_period = 0
    else:
        num_of_days_in_current_interest_payment_period = num_of_days_in_period

    coupon_payments_discounted_total = (coupon * num_of_days_in_current_interest_payment_period / NUM_OF_DAYS_IN_YEAR) / \
                                       (discount_rate ** settlement_date_to_next_coupon_date_frac)
    coupon_payment = coupon / frequency
    for k in range(1, num_of_interest_payments):
        coupon_payment_discounted = coupon_payment / (discount_rate ** (settlement_date_to_next_coupon_date_frac + k))
        coupon_payments_discounted_total += coupon_payment_discounted
        
    accrued = coupon * prev_coupon_date_to_settlement_date / NUM_OF_DAYS_IN_YEAR    # R * A / B
    return RV_and_interest_due_at_end_date_discounted + coupon_payments_discounted_total - accrued

In [9]:
import numpy as np
import pandas as pd

'''
This function is a helper function for `compute_price`. This function calculates the price of a trade, where `yield_rate` 
is a specific yield and `end_date` is a fixed repayment date. All dates must be valid relative to the settlement 
date, as opposed to the trade date. Note that "yield" is a reserved word in Python and should not be used as the name 
of a variable or column.
Formulas are from https://www.msrb.org/pdf.aspx?url=https%3A%2F%2Fwww.msrb.org%2FRules-and-Interpretations%2FMSRB-Rules%2FGeneral%2FRule-G-33.aspx.
For all bonds, `base` is the present value of future cashflows to the buyer. 
The clean price is this price minus the accumulated amount of simple interest that the buyer must pay to the seller, which is called `accrued`.
Zero-coupon bonds are handled first. For these, the yield is assumed to be compounded semi-annually, i.e., once every six months.
For bonds with non-zero coupon, the first and last interest payment periods may have a non-standard length,
so they must be handled separately.
When referring to the formulas in the MSRB handbook (link above), the below variables map to the code.
A: prev_coupon_date_to_settlement_date
B: NUM_OF_DAYS_IN_YEAR
Y: yield_rate
N: num_of_interest_payments
E: num_of_days_in_period
F: settlement_date_to_next_coupon_date
P: price
D: settlement_date_to_end_date
H: prev_coupon_date_to_end_date
R: coupon
'''
def get_price(cusip, 
              prev_coupon_date, 
              first_coupon_date, 
              next_coupon_date, 
              end_date, 
              settlement_date, 
              accrual_date, 
              frequency, 
              yield_rate, 
              coupon, 
              RV, 
              time_delta, 
              last_period_accrues_from_date):
    yield_rate = yield_rate / 100
    
    # Right now we do not disambiguate zero coupon from interest at maturity. More specfically, 
    # we should add logic that separates the cases of MSRB Rule Book G-33, rule (b) and rule (c)
    if frequency == 0:
        # See description for `price_of_bond_with_interest_at_maturity`
        price = price_of_bond_with_interest_at_maturity(cusip, 
                                                        settlement_date, 
                                                        accrual_date, 
                                                        end_date, 
                                                        yield_rate, 
                                                        coupon, 
                                                        RV)
    else:
        num_of_interest_payments, final_coupon_date = get_num_of_interest_payments_and_final_coupon_date(next_coupon_date, 
                                                                                                         end_date, 
                                                                                                         time_delta)
        prev_coupon_date_to_settlement_date = diff_in_days_two_dates(settlement_date, prev_coupon_date)
            
        num_of_days_in_period = NUM_OF_DAYS_IN_YEAR / frequency    # number of days in interest payment period 
        assert num_of_days_in_period == round(num_of_days_in_period)
         
        if compare_dates(end_date, next_coupon_date) <= 0:
            # MSRB Rule Book G-33, rule (b)(i)(B)(1)
            settlement_date_to_end_date = diff_in_days_two_dates(end_date, settlement_date)
            final_coupon_date_to_end_date = diff_in_days_two_dates(end_date, final_coupon_date)
            interest_due_at_end_date = coupon * final_coupon_date_to_end_date / NUM_OF_DAYS_IN_YEAR
            base = (RV + coupon / frequency + interest_due_at_end_date) / \
                   (1 + (yield_rate / frequency) * settlement_date_to_end_date / num_of_days_in_period)
            accrued = coupon * prev_coupon_date_to_settlement_date / NUM_OF_DAYS_IN_YEAR
            price = base - accrued
        else:
            # MSRB Rule Book G-33, rule (b)(i)(B)(2)
            price = price_of_bond_with_multiple_periodic_interest_payments(cusip, 
                                                                           settlement_date, 
                                                                           accrual_date, 
                                                                           first_coupon_date, 
                                                                           prev_coupon_date, 
                                                                           next_coupon_date, 
                                                                           final_coupon_date, 
                                                                           end_date,  
                                                                           frequency,
                                                                           num_of_interest_payments, 
                                                                           yield_rate,
                                                                           coupon, 
                                                                           RV, 
                                                                           time_delta, 
                                                                           last_period_accrues_from_date)              
    return trunc_and_round_price(price)

'''
This function computes the price of a trade. For bonds that have not been called, the price is the lowest of
three present values: to the next call date (which may be above par), to the next par call date, and to maturity.
'''
def compute_price(trade, yield_rate=None):
    if trade.interest_payment_frequency != 0 and pd.isnull(trade.first_coupon_date):
        print(f"Bond (CUSIP: {trade.cusip}, RTRS: {trade.rtrs_control_number}) has a coupon but no first coupon date.")    # printing instead of raising an error to not disrupt processing large quantities of trades
        old_date = pd.to_datetime('2000-01-01')
        return -100, old_date, -100,-100,-100, -1
    # check if frequency is not 0 and if there is not first_coupon_date, if true, warn and exit
    if yield_rate == None:
        yield_rate = trade['yield']
    elif type(yield_rate) == str:
        raise ValueError('Yield rate argument cannot be a string. It must be a numerical value.')

    frequency = trade.interest_payment_frequency
    time_delta = get_time_delta_from_interest_frequency(frequency)
    my_prev_coupon_date, my_next_coupon_date = get_prev_coupon_date_and_next_coupon_date(trade, frequency, time_delta)

    get_price_caller = lambda end_date, redemption_value: get_price(trade.cusip, 
                                                                    my_prev_coupon_date, 
                                                                    trade.first_coupon_date, 
                                                                    my_next_coupon_date, 
                                                                    end_date, 
                                                                    trade.settlement_date, 
                                                                    trade.accrual_date, 
                                                                    frequency, 
                                                                    yield_rate, 
                                                                    trade.coupon, 
                                                                    redemption_value, 
                                                                    time_delta, 
                                                                    trade.last_period_accrues_from_date)

    redemption_value_at_maturity = 100
    if (not trade.is_called) and (not trade.is_callable):
        yield_to_maturity = get_price_caller(trade.maturity_date, redemption_value_at_maturity)
        return yield_to_maturity, trade.maturity_date, 0, 0 ,0 ,  2
    elif trade.is_called:
        end_date = end_date_for_called_bond(trade)

        if compare_dates(end_date, trade.settlement_date) < 0:
            print(f"Bond (CUSIP: {trade.cusip}, RTRS: {trade.rtrs_control_number}) has an end date ({end_date}) which is after the settlement date ({trade.settlement_date}).")    # printing instead of raising an error to not disrupt processing large quantities of trades
            # raise ValueError(f"Bond (CUSIP: {trade.cusip}, RTRS: {trade.rtrs_control_number}) has an end date ({end_date}) which is after the settlement date ({trade.settlement_date}).")
        
        redemption_value_at_refund = refund_price_for_called_bond(trade)
        return get_price_caller(end_date, redemption_value_at_refund), end_date, 0, 0 ,0 , 3
    else:
        next_price, to_par_price, maturity_price = float('inf'), float('inf'), float('inf')

        if not pd.isnull(trade.par_call_date):
            to_par_price = get_price_caller(trade.par_call_date, trade.par_call_price)
        if not pd.isnull(trade.next_call_date):
            next_price = get_price_caller(trade.next_call_date, trade.next_call_price)
        maturity_price = get_price_caller(trade.maturity_date, redemption_value_at_maturity)

        prices_and_dates = [(next_price, trade.next_call_date), 
                            (to_par_price, trade.par_call_date), 
                            (maturity_price, trade.maturity_date)]
        calc_price, calc_date = min(prices_and_dates, key=lambda pair: pair[0]) # this function is stable and will choose the pair which appears first in the case of ties for the lowest price
    if calc_date == trade.next_call_date:
        calc_date_selection = 0
    elif calc_date == trade.par_call_date:
        calc_date_selection = 1
    elif calc_date == trade.maturity_date:
        calc_date_selection = 2
    elif calc_date == trade.refund_date:
        calc_date_selection = 3
    else:
        calc_date_selection = 4      
    return calc_price, calc_date, next_price,to_par_price,maturity_price,calc_date_selection


In [10]:
def get_trade_data(bqclient,begin_date,next_date):
    query = f'''
    SELECT
                IFNULL(settlement_date, assumed_settlement_date) AS settlement_date,
                trade_date,
                cusip,
                accrual_date,
                dollar_price,
                issue_price,
                current_coupon_rate as coupon,
                interest_payment_frequency,
                next_call_date,
                par_call_date,
                next_call_price,
                par_call_price,
                maturity_date,
                previous_coupon_payment_date,
                next_coupon_payment_date,
                first_coupon_date,
                coupon_type,
                muni_security_type,
                called_redemption_type,
                refund_date,
                refund_price,
                is_callable,
                is_called,
                call_timing,
                yield,
                called_redemption_date,
                rtrs_control_number,
                has_zero_coupons,
                last_period_accrues_from_date,
                is_lop_or_takedown,
                when_issued,
                is_non_transaction_based_compensation,
                brokers_broker,
                trade_datetime,
                publish_datetime,
                issue_key,
                sequence_number,
                par_traded,
                series_name,
                case when series_name is null then cast(issue_key as string) else
                concat(issue_key,series_name) end as series_id
                FROM `eng-reactor-287421.auxiliary_views.trades_with_ref_data_pd`
                WHERE
                publish_datetime BETWEEN '{begin_date}' AND '{next_date}'
                and rtrs_control_number <> 2021030407690800 and rtrs_control_number <> 2022042012330500 
                and rtrs_control_number <> 2022042012332500 and rtrs_control_number <> 2022032507376100
                '''
    dataframe = bqclient.query(query).result().to_dataframe()
    return dataframe


In [11]:

import datetime

def get_latest_publish_datetime():
    query = f'''
    SELECT
    publish_datetime
    FROM
    `eng-reactor-287421.auxiliary_views.calculated_price_with_accrual_date`
    order by publish_datetime desc  limit 1
    '''
    query_job = bqclient.query(query).result().to_dataframe()
    query_job = query_job.values[0][0]
    query_job = datetime.datetime.utcfromtimestamp(query_job.tolist()/1e9)
    return query_job



from google.cloud import bigquery
import pandas
import pytz

def getSchema():
    schema = [  bigquery.SchemaField("rtrs_control_number", "INTEGER"),
                bigquery.SchemaField("trade_datetime", "DATETIME"),
                bigquery.SchemaField("cusip", "STRING"),
                bigquery.SchemaField('calc_price',"FLOAT"),
                bigquery.SchemaField('price_to_next_call',"FLOAT"),
                bigquery.SchemaField('price_to_par_call',"FLOAT"),
                bigquery.SchemaField('price_to_maturity',"FLOAT"),
                bigquery.SchemaField('calc_date',"DATE"),
                bigquery.SchemaField("calc_date_selection", "INTEGER"),
                bigquery.SchemaField('price_delta', "FLOAT"),
                bigquery.SchemaField('publish_datetime', "DATETIME"),
                bigquery.SchemaField('when_issued', "BOOLEAN"),
                bigquery.SchemaField("issue_key", "INTEGER"),
                bigquery.SchemaField("sequence_number", "INTEGER"),
                bigquery.SchemaField("par_traded", "INTEGER"),
                bigquery.SchemaField("series_name", "STRING"),
                bigquery.SchemaField("series_id", "STRING")
              
            ]
    return schema


def uploadData(vanilla):
    client = bigquery.Client(project=PROJECT_ID, location="US")
    useful_columns = vanilla[["rtrs_control_number", "trade_datetime", "cusip",'calc_price','price_to_next_call','price_to_par_call', 'price_to_maturity','calc_date','price_delta','publish_datetime',"when_issued", "calc_date_selection","issue_key","sequence_number","par_traded","series_name","series_id"]]

    job_config = bigquery.LoadJobConfig(schema = getSchema(),
                                       write_disposition="WRITE_APPEND"
                                       )
    
    job = client.load_table_from_dataframe(useful_columns, TABLE_ID,job_config=job_config)
    
    try:
        job.result()
        print("Upload Successful")
    except Exception as e:
        print("Failed to Upload")
        raise e


In [13]:
import datetime
begin_date = datetime.datetime(2019,1,1) 
next_date = datetime.datetime(2019,1,1)
end_date = datetime.datetime(2021,1,1)

PROJECT_ID = "eng-reactor-287421"
TABLE_ID = "eng-reactor-287421.auxiliary_views.calc_date_and_price"

while begin_date <= end_date:
    next_date = begin_date + datetime.timedelta(days=5)
    print(begin_date.strftime('%Y-%m-%d'),next_date.strftime('%Y-%m-%d'))
    %time df = get_trade_data(bqclient,begin_date.strftime('%Y-%m-%d'),next_date.strftime('%Y-%m-%d'))
    vanilla = transform_reference_data(df) 
    vanilla['anomaly'] = (vanilla.par_traded < 5000) | (vanilla['yield'] < 0) 
    vanilla['anomaly'] = vanilla['anomaly'] | pd.isnull(vanilla.settlement_date) | pd.isnull(vanilla.first_coupon_date) & (vanilla.coupon > 0)
    vanilla['alert'] = False # vanilla.cusip == "89386FAD5" # vanilla.redemption_type == 5 # vanilla.cusip == "803093AM5" # 
    #print (vanilla.head()) 
    if df.empty:
        print ("Dataframe is empty!")
        begin_date = next_date
    else:
        print ("Dataframe is not empty")
        print(len(vanilla))
        vanilla['calc_price'], vanilla['calc_date'],vanilla['price_to_next_call'],vanilla['price_to_par_call'],vanilla['price_to_maturity'],vanilla['calc_date_selection'] = zip(*vanilla.apply(lambda x: compute_price(x),axis=1))
        vanilla['price_delta'] = abs(vanilla.calc_price - vanilla.dollar_price)
        
#         for key in vanilla["issue_key"]:
#             if pd.isnull(key):
#                     print(vanilla["cusip"], vanilla["rtrs_control_number"])
#         for key in vanilla["sequence_number"]:
#             if pd.isnull(key):
#                     print(vanilla["cusip"], vanilla["rtrs_control_number"])
        uploadData(vanilla)
#         print("Data uploaded")
        begin_date = next_date
        begin_date = next_date
        

2019-01-01 2019-01-06
CPU times: user 345 ms, sys: 224 ms, total: 569 ms
Wall time: 46.7 s
Dataframe is not empty
91102
Upload Successful
2019-01-06 2019-01-11
CPU times: user 380 ms, sys: 238 ms, total: 618 ms
Wall time: 40.3 s
Dataframe is not empty
131152
Upload Successful
2019-01-11 2019-01-16
CPU times: user 300 ms, sys: 227 ms, total: 527 ms
Wall time: 53.3 s
Dataframe is not empty
90778
Upload Successful
2019-01-16 2019-01-21
CPU times: user 298 ms, sys: 227 ms, total: 525 ms
Wall time: 45.8 s
Dataframe is not empty
90073
Upload Successful
2019-01-21 2019-01-26
CPU times: user 400 ms, sys: 311 ms, total: 711 ms
Wall time: 1min 7s
Dataframe is not empty
120057
Upload Successful
2019-01-26 2019-01-31
CPU times: user 290 ms, sys: 200 ms, total: 490 ms
Wall time: 57.7 s
Dataframe is not empty
89515
Upload Successful
2019-01-31 2019-02-05
CPU times: user 274 ms, sys: 241 ms, total: 516 ms
Wall time: 44.8 s
Dataframe is not empty
84707
Upload Successful
2019-02-05 2019-02-10
CPU times

In [None]:
for key in vanilla["issue_key"]:
    if pd.isnull(key):
            print(vanilla["cusip"], vanilla["rtrs_control_number"])
    else:
        print("hi")
    

### vanilla.plot.scatter(x='dollar_price', y='calc_price', c='DarkBlue')

In [None]:
print("Sum of errors: {}".format(np.sum(vanilla['price_delta'])))
print("Mean of errors: {}".format(np.mean(vanilla['price_delta'])))

In [None]:
x = vanilla[vanilla["price_delta"]> 1]

In [None]:
x['rtrs_control_number']

In [118]:
x = pd.to_datetime('2000-01-01')

In [119]:
x

Timestamp('2000-01-01 00:00:00')