In [1]:
import sqlite3
import pandas as pd
import quandl
import numpy as np
from datetime import datetime
import warnings

# Set your Quandl API key
quandl.ApiConfig.api_key = 'FMezMmR86K7axszB_rkz'

# Create an SQLite connection and cursor
conn = sqlite3.connect('Treasury_Yields_data.db')
cursor = conn.cursor()

# Define the new series codes
series_codes = {"YIELD":'YIELD',"REALYIELD":'REALYIELD'}

# Prefix to be added to each series code
prefix = "USTREASURY/"

# Get the current date and time
current_datetime = datetime.now()
# Extract the date part
current_date = current_datetime.date()

# Function to update a table with new data
def update_table(series_code, periodicity):
    full_series_code = prefix + series_code
    table_name = series_codes[series_code] + '_' + periodicity
    # Print the list of existing tables
    existing_tables_query = "SELECT name FROM sqlite_master WHERE type='table';"
    existing_tables = cursor.execute(existing_tables_query).fetchall()
    # print("Existing Tables:", [table[0] for table in existing_tables])

    # Check the latest date in the existing table
    query_latest_date = f'SELECT MAX(Date) FROM "{table_name}"'  # Use double quotes for table name
    latest_date = pd.read_sql_query(query_latest_date, conn).iloc[0, 0]

    query_third_latest_date =  f'SELECT Date FROM "{table_name}" ORDER BY Date DESC;'
    third_to_last_date = pd.read_sql_query(query_third_latest_date, conn).loc[2] # zero indexed

    # Fetch existing data from the database
    existing_data_query = f'SELECT * FROM "{table_name}"'
    existing_data = pd.read_sql_query(existing_data_query, conn)
    # Ensure numeric data types in existing data
    existing_data_numeric = existing_data.apply(pd.to_numeric, errors='coerce')
    existing_data_numeric = existing_data_numeric.dropna()
  
    # Fetch new data from Quandl if available
    new_data = quandl.get(full_series_code, collapse=periodicity, start_date=third_to_last_date)

    if not new_data.empty:
        # Safety checks
        if new_data.isnull().values.any():
            print(f"Warning: Missing values found in {table_name}. Skipping update.")
            return

        expected_data_types = [np.float64]
        if not all(pd.api.types.is_numeric_dtype(dtype) for dtype in new_data.dtypes):
            print(f"Warning: Incorrect data types found in {table_name}.")
            print(f"Expected data types: {expected_data_types}")
            print(f"Actual data types: {new_data.dtypes}")
            print("Proceeding with the update.")

        # Calculate IQR from existing data
        Q1_existing = existing_data_numeric.quantile(0.25)
        Q3_existing = existing_data_numeric.quantile(0.75)
        IQR_existing = Q3_existing - Q1_existing

        # Get the intersection of columns
        common_columns = new_data.columns.intersection(existing_data_numeric.columns)

        # Identify outliers using the IQR method on common columns
        outliers = (
            (new_data[common_columns] < (Q1_existing[common_columns] - 1.5 * IQR_existing[common_columns])) |
            (new_data[common_columns] > (Q3_existing[common_columns] + 1.5 * IQR_existing[common_columns]))
        ).any(axis=1)
        if outliers.any():
            warnings.warn(f"Warning: Outliers found in {table_name}. Proceeding with the update.")

        # Check for duplicate date observations
        if latest_date is not None and new_data.index.max() <= pd.to_datetime(latest_date):
            print(f"No new data available for {table_name}.")
        else:
           
            # Append new observations to the original table
            new_observations = new_data[new_data.index > pd.to_datetime(latest_date)]
            # Remove any observations that represent "accrued" observations
            new_observations = new_observations[new_observations.index <= pd.to_datetime(current_date)]
            # only new observations that are final numbers remain
            if len(new_observations) > 0 :
                new_observations.to_sql(table_name, conn, if_exists='append', index=True)
           
                # Create a table for the period-to-period difference
                diff_data = new_data.diff().dropna()
                new_observations = diff_data[diff_data.index > pd.to_datetime(latest_date)]
                diff_table_name = table_name + '_diff'
                new_observations.to_sql(diff_table_name, conn, if_exists='append', index=True)

                # Create a table for the percent change with handling of Inf values
                percent_change_data = new_data.pct_change().dropna()
                new_observations = percent_change_data[percent_change_data.index > pd.to_datetime(latest_date)]
                percent_change_table_name = table_name + '_percent_change'
                new_observations.to_sql(percent_change_table_name, conn, if_exists='append', index=True)

                print(f"Updated {table_name} with new data.")

            else:
                print(f"No new finalized data available for {table_name}.")
    else:
        print(f"No new data available for {table_name}.")


periodicities = {
   'daily','weekly','monthly'
}
# Update tables for each series code
for series_code in series_codes:
    for periodicity in periodicities:
        update_table(series_code, periodicity)
        

# Commit changes and close connection
conn.commit()
conn.close()

print("Database updated successfully.")


Updated YIELD_monthly with new data.
Updated YIELD_daily with new data.
Updated YIELD_weekly with new data.
Updated REALYIELD_monthly with new data.
Updated REALYIELD_daily with new data.
Updated REALYIELD_weekly with new data.
Database updated successfully.


In [2]:
latest_date

NameError: name 'latest_date' is not defined

In [3]:
new_data.index.max()


NameError: name 'new_data' is not defined

In [4]:
import sqlite3
import pandas as pd
import quandl
import numpy as np
from datetime import datetime
import warnings
   # Create an SQLite connection and cursor
conn = sqlite3.connect('Treasury_Yields_data.db')
cursor = conn.cursor()
   
    # Define the new series codes
series_codes = "YIELD"

# Prefix to be added to each series code
prefix = "USTREASURY/"
full_series_code = prefix + series_codes
table_name = series_codes + '_' + 'monthly'
# Print the list of existing tables
existing_tables_query = "SELECT name FROM sqlite_master WHERE type='table';"
existing_tables = cursor.execute(existing_tables_query).fetchall()
# print("Existing Tables:", [table[0] for table in existing_tables])

# Check the latest date in the existing table
query_latest_date = f'SELECT MAX(Date) FROM "{table_name}"'  # Use double quotes for table name
latest_date = pd.read_sql_query(query_latest_date, conn).iloc[0, 0]

query_second_latest_date = f'SELECT MAX(Date) FROM "{table_name}" WHERE Date < (SELECT MAX(Date) FROM "{table_name}");'
second_to_last_date = pd.read_sql_query(query_second_latest_date, conn).iloc[0,0]

query_third_latest_date =  f'SELECT Date FROM "{table_name}" ORDER BY Date DESC;'
third_to_last_date = pd.read_sql_query(query_third_latest_date, conn).loc[2] # zero indexed

In [5]:

new_data = quandl.get("USTREASURY/YIELD", collapse="monthly", start_date=third_to_last_date)


In [10]:
new_data = quandl.get("USTREASURY/YIELD", collapse="weekly", start_date='2023-10-22', end_date = '2023-11-06' )

In [47]:
new_data.tail()

Unnamed: 0_level_0,1 MO,2 MO,3 MO,6 MO,1 YR,2 YR,3 YR,5 YR,7 YR,10 YR,20 YR,30 YR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2023-08-31,5.52,5.55,5.56,5.48,5.37,4.85,4.54,4.23,4.19,4.09,4.39,4.2
2023-09-30,5.55,5.6,5.55,5.53,5.46,5.03,4.8,4.6,4.61,4.59,4.92,4.73
2023-10-31,5.56,5.57,5.59,5.54,5.44,5.07,4.9,4.82,4.89,4.88,5.21,5.04
2023-11-30,5.55,5.56,5.54,5.45,5.27,4.92,4.67,4.49,4.51,4.47,4.79,4.6


In [48]:
# Check for duplicate date observations
if latest_date is not None and new_data.index.max() <= pd.to_datetime(latest_date):
    print(f"No new data available for {table_name}.")
else:
    
    # Append new observations to the original table
    new_observations = new_data[new_data.index > pd.to_datetime(latest_date)]
    # Remove any observations that represent "accrued" observations
    new_observations = new_observations[new_observations.index > pd.to_datetime(latest_date) and new_observations.index <= pd.to_datetime(current_date) ]
    if len(new_observations) > 0 :
    
        # Create a table for the period-to-period difference
        diff_data = new_data.diff().dropna()
        new_observations = diff_data[diff_data.index > pd.to_datetime(latest_date)]
        diff_table_name = table_name + '_diff'
        
        # Create a table for the percent change with handling of Inf values
        percent_change_data = new_data.pct_change().dropna()
        new_observations = percent_change_data[percent_change_data.index > pd.to_datetime(latest_date)]
        percent_change_table_name = table_name + '_percent_change'
        
        print(f"Updated {table_name} with new data.")
    else:
        print(f"No new data available for {table_name}.")


No new data available for YIELD_monthly.


In [49]:
new_data.index.max()

Timestamp('2023-11-30 00:00:00')

In [50]:
print(latest_date)

2023-10-31 00:00:00


In [51]:
new_data.tail()

Unnamed: 0_level_0,1 MO,2 MO,3 MO,6 MO,1 YR,2 YR,3 YR,5 YR,7 YR,10 YR,20 YR,30 YR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2023-08-31,5.52,5.55,5.56,5.48,5.37,4.85,4.54,4.23,4.19,4.09,4.39,4.2
2023-09-30,5.55,5.6,5.55,5.53,5.46,5.03,4.8,4.6,4.61,4.59,4.92,4.73
2023-10-31,5.56,5.57,5.59,5.54,5.44,5.07,4.9,4.82,4.89,4.88,5.21,5.04
2023-11-30,5.55,5.56,5.54,5.45,5.27,4.92,4.67,4.49,4.51,4.47,4.79,4.6


In [52]:
new_observations.tail()

Unnamed: 0_level_0,1 MO,2 MO,3 MO,6 MO,1 YR,2 YR,3 YR,5 YR,7 YR,10 YR,20 YR,30 YR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1


In [54]:
# Create a table for the period-to-period difference
diff_data = new_data.diff().dropna()
new_observations_diff = diff_data[latest_date:current_date]

In [57]:
new_observations_diff.tail()

Unnamed: 0_level_0,1 MO,2 MO,3 MO,6 MO,1 YR,2 YR,3 YR,5 YR,7 YR,10 YR,20 YR,30 YR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2023-10-31,0.01,-0.03,0.04,0.01,-0.02,0.04,0.1,0.22,0.28,0.29,0.29,0.31


In [58]:
new_observations_diff[new_observations_diff.index > pd.to_datetime(latest_date) and new_observations_diff.index <= pd.to_datetime(current_date) ]

Unnamed: 0_level_0,1 MO,2 MO,3 MO,6 MO,1 YR,2 YR,3 YR,5 YR,7 YR,10 YR,20 YR,30 YR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
