In [1]:
import pandas as pd
from pandas import Timestamp
from pandas.tseries.offsets import BDay
import numpy as np
from scipy.interpolate import interp1d
import os
import re
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.tseries.offsets import BDay

# 1. Change directory to raw data folder

In [2]:
# Get the current working directory
current_directory = os.getcwd()

# Get the parent directory by going one level up
parent_directory = os.path.dirname(current_directory)

# Get the path of raw data folder
data_raw_folder = os.path.join(parent_directory, 'data/raw')

# Change the current working directory to raw data folder
os.chdir(data_raw_folder)

# 2. Import dataset

In [3]:
new_issues = pd.read_csv('new_issues.csv', index_col=0)

In [4]:
new_issues = new_issues.reset_index(drop=True)

In [5]:
new_issues_filt = new_issues[new_issues['coupon'] < 10].copy()

# 3. Calculate duration

In [6]:
# Fill missing values in 'frequency' column with 'Annually'
new_issues_filt['frequency'].fillna('Annually', inplace=True)

In [7]:
frequency_mapping = {'Annually': 1, 'Semi-Annually': 2, 'Quarterly': 4}
new_issues_filt['coupon_frequency'] = new_issues_filt['frequency'].map(frequency_mapping)

In [8]:
def calculate_duration(coupon_rate, time_to_maturity, coupon_frequency):

    coupon_rate_decimal = coupon_rate / 100
    
    periods = int(time_to_maturity * coupon_frequency)
    
    coupon_payment = coupon_rate_decimal / coupon_frequency
    cash_flows = [coupon_payment] * periods
    cash_flows[-1] += 1  
    
    yield_rate = coupon_rate_decimal
    present_values = [cf / (1 + yield_rate) ** (i+1) for i, cf in enumerate(cash_flows)]
    
    weighted_average = sum(pv * (i+1) for i, pv in enumerate(present_values)) / sum(present_values)
    
    current_price = 1 
    modified_duration = (weighted_average / current_price) / coupon_frequency
    
    return modified_duration

In [9]:
new_issues_filt['duration'] = new_issues_filt.apply(lambda row: calculate_duration(row['coupon'], row['maturityTerm'], row['coupon_frequency']), axis=1)

In [13]:
new_issues_filt.to_csv('new_issues.csv')