In [1]:
import pandas as pd

In [2]:
data_filepath= 'data/10year.csv'
full_data_filepath = 'data/data.csv'

In [3]:
df = pd.read_csv('data/us10.csv')
df

Unnamed: 0,Date,Price,Open,High,Low,Change %
0,04/17/2025,4.332,4.288,4.344,4.278,1.22%
1,04/16/2025,4.280,4.335,4.352,4.263,-1.36%
2,04/15/2025,4.339,4.374,4.407,4.306,-1.00%
3,04/14/2025,4.383,4.482,4.490,4.360,-2.51%
4,04/11/2025,4.495,4.464,4.592,4.384,1.62%
...,...,...,...,...,...,...
2753,09/24/2014,2.566,2.531,2.569,2.527,1.54%
2754,09/23/2014,2.527,2.556,2.556,2.527,-1.52%
2755,09/22/2014,2.566,2.569,2.578,2.547,-0.47%
2756,09/19/2014,2.578,2.629,2.655,2.575,-1.53%


In [4]:
# transform Date column to datetime from dd/mm/yyyy
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y')

# transform price,open,high,low columns to double
df['Price'] = df['Price'].astype(float)
df['Open'] = df['Open'].astype(float)
df['High'] = df['High'].astype(float)
df['Low'] = df['Low'].astype(float)
df

Unnamed: 0,Date,Price,Open,High,Low,Change %
0,2025-04-17,4.332,4.288,4.344,4.278,1.22%
1,2025-04-16,4.280,4.335,4.352,4.263,-1.36%
2,2025-04-15,4.339,4.374,4.407,4.306,-1.00%
3,2025-04-14,4.383,4.482,4.490,4.360,-2.51%
4,2025-04-11,4.495,4.464,4.592,4.384,1.62%
...,...,...,...,...,...,...
2753,2014-09-24,2.566,2.531,2.569,2.527,1.54%
2754,2014-09-23,2.527,2.556,2.556,2.527,-1.52%
2755,2014-09-22,2.566,2.569,2.578,2.547,-0.47%
2756,2014-09-19,2.578,2.629,2.655,2.575,-1.53%


In [5]:
# reverse the dataframe using Date column
df = df.sort_values(by='Date', ascending=True)
df

Unnamed: 0,Date,Price,Open,High,Low,Change %
2757,2014-09-18,2.618,2.629,2.642,2.594,-0.15%
2756,2014-09-19,2.578,2.629,2.655,2.575,-1.53%
2755,2014-09-22,2.566,2.569,2.578,2.547,-0.47%
2754,2014-09-23,2.527,2.556,2.556,2.527,-1.52%
2753,2014-09-24,2.566,2.531,2.569,2.527,1.54%
...,...,...,...,...,...,...
4,2025-04-11,4.495,4.464,4.592,4.384,1.62%
3,2025-04-14,4.383,4.482,4.490,4.360,-2.51%
2,2025-04-15,4.339,4.374,4.407,4.306,-1.00%
1,2025-04-16,4.280,4.335,4.352,4.263,-1.36%


In [6]:
start_date = pd.to_datetime('2014-09-18')
end_date = pd.to_datetime('2025-04-20')

date_range = pd.date_range(start=start_date, end=end_date)
df = df.set_index('Date').reindex(date_range, method='nearest').reset_index()
df.rename(columns={'index': 'Date'}, inplace=True)
df

Unnamed: 0,Date,Price,Open,High,Low,Change %
0,2014-09-18,2.618,2.629,2.642,2.594,-0.15%
1,2014-09-19,2.578,2.629,2.655,2.575,-1.53%
2,2014-09-20,2.578,2.629,2.655,2.575,-1.53%
3,2014-09-21,2.566,2.569,2.578,2.547,-0.47%
4,2014-09-22,2.566,2.569,2.578,2.547,-0.47%
...,...,...,...,...,...,...
3863,2025-04-16,4.280,4.335,4.352,4.263,-1.36%
3864,2025-04-17,4.332,4.288,4.344,4.278,1.22%
3865,2025-04-18,4.332,4.288,4.344,4.278,1.22%
3866,2025-04-19,4.332,4.288,4.344,4.278,1.22%


In [7]:
df_data = pd.read_csv('data/data.csv')

# check if date column exists in data/data.csv
if 'Date' not in df_data.columns:
    # set Date column using df['Date']
    df_data['Date'] = df['Date']

# add Price column to data/data.csv as 10Y Bond Price
if 'US10 Open' not in df_data.columns:
    df_data['US10 Open'] = df['Open']

# convert Date column to datetime
df_data['Date'] = pd.to_datetime(df_data['Date'], format='%Y-%m-%d')

# check if there are missing newer dates in data/data.csv compared to date_range
if df_data['Date'].max() < df['Date'].max():
    # add missing dates to data/data.csv
    missing_dates = df[~df['Date'].isin(df_data['Date'])]
    missing_dates = missing_dates[['Date', 'Open']]
    missing_dates.rename(columns={'Open': 'US10 Open'}, inplace=True)
    df_data = pd.concat([df_data, missing_dates], ignore_index=True)

# write the updated data/data.csv
df_data.to_csv('data/data.csv', index=False)