In [1]:
import sqlite3
import pandas as pd
import quandl

# Set your Quandl API key
quandl.ApiConfig.api_key = 'FMezMmR86K7axszB_rkz'

# Define the series codes
series_codes = ["YIELD"]

# Prefix to be added to each series code
prefix = "USTREASURY/"

# Frequencies to download (daily, weekly, monthly)
frequencies = ['daily', 'weekly', 'monthly']

# Create an SQLite connection and cursor
conn = sqlite3.connect('Treasury_Yields_data.db')
cursor = conn.cursor()

# Fetch data and create tables for each frequency
for series_code in series_codes:
    for frequency in frequencies:
        full_series_code = prefix + series_code

        # Adjust the frequency in the Quandl request
        data = quandl.get(full_series_code, collapse=frequency)

        # Create a table for the original series
        table_name = f"{series_code}_{frequency}"
        data.to_sql(table_name, conn, if_exists='replace', index=True)

        # Create a table for the period-to-period difference
        diff_data = data.diff()
        diff_table_name = f"{series_code}_{frequency}_diff"
        diff_data.to_sql(diff_table_name, conn, if_exists='replace', index=True)

        # Create a table for the percent change
        percent_change_data = data.pct_change()
        percent_change_table_name = f"{series_code}_{frequency}_percent_change"
        percent_change_data.to_sql(percent_change_table_name, conn, if_exists='replace', index=True)

# Commit changes and close connection
conn.commit()
conn.close()

print("Databases created successfully.")


Databases created successfully.


In [36]:
import sqlite3
import pandas as pd
import quandl
import numpy as np
from datetime import datetime
import warnings

# Set your Quandl API key
quandl.ApiConfig.api_key = 'FMezMmR86K7axszB_rkz'

# Create an SQLite connection and cursor
conn = sqlite3.connect('Treasury_Yields_data.db')
cursor = conn.cursor()

# Define the new series codes
series_codes = ["YIELD"]

# Prefix to be added to each series code
prefix = "USTREASURY/"

frequencies = {
    'YIELD_daily': 'daily',
    'YIELD_weekly': 'weekly',
    'YIELD_monthly': 'monthly'
}

# Function to update a table with new data
def update_table(series_code, periodicity):
    full_series_code = prefix + series_code

    # Print the list of existing tables
    existing_tables_query = "SELECT name FROM sqlite_master WHERE type='table';"
    existing_tables = cursor.execute(existing_tables_query).fetchall()
    # print("Existing Tables:", [table[0] for table in existing_tables])

    # Check the latest date in the existing table
    query_latest_date = f'SELECT MAX(Date) FROM "{table_name}"'  # Use double quotes for table name
    latest_date = pd.read_sql_query(query_latest_date, conn).iloc[0, 0]

    # Fetch existing data from the database
    existing_data_query = f'SELECT * FROM "{table_name}"'
    existing_data = pd.read_sql_query(existing_data_query, conn)
    # Ensure numeric data types in existing data
    existing_data_numeric = existing_data.apply(pd.to_numeric, errors='coerce')
    existing_data_numeric = existing_data_numeric.dropna()
  
    # Fetch new data from Quandl if available
    new_data = quandl.get(full_series_code, collapse=periodicity, start_date=latest_date)

    if not new_data.empty:
        # Safety checks
        if new_data.isnull().values.any():
            print(f"Warning: Missing values found in {table_name}. Skipping update.")
            return

        expected_data_types = [np.float64]
        if not all(pd.api.types.is_numeric_dtype(dtype) for dtype in new_data.dtypes):
            print(f"Warning: Incorrect data types found in {table_name}.")
            print(f"Expected data types: {expected_data_types}")
            print(f"Actual data types: {new_data.dtypes}")
            print("Proceeding with the update.")

        # Calculate IQR from existing data
        Q1_existing = existing_data_numeric.quantile(0.25)
        Q3_existing = existing_data_numeric.quantile(0.75)
        IQR_existing = Q3_existing - Q1_existing

        # Get the intersection of columns
        common_columns = new_data.columns.intersection(existing_data_numeric.columns)

        # Identify outliers using the IQR method on common columns
        outliers = (
            (new_data[common_columns] < (Q1_existing[common_columns] - 1.5 * IQR_existing[common_columns])) |
            (new_data[common_columns] > (Q3_existing[common_columns] + 1.5 * IQR_existing[common_columns]))
        ).any(axis=1)
        if outliers.any():
            warnings.warn(f"Warning: Outliers found in {series_code}. Proceeding with the update.")

        # Check for duplicate date observations
        if latest_date is not None and new_data.index.min() <= pd.to_datetime(latest_date):
            print(f"No new data available for {table_name}.")
        else:
            # Append new observations to the original table
            new_data.to_sql(series_code, conn, if_exists='append', index=True)

            # Create a table for the period-to-period difference
            diff_data = new_data.diff().dropna()
            diff_table_name = series_code + '_diff'
            diff_data.to_sql(diff_table_name, conn, if_exists='append', index=True)

            # Create a table for the percent change with handling of Inf values
            percent_change_data = new_data.pct_change().dropna()
            percent_change_table_name = series_code + '_percent_change'
            percent_change_data.to_sql(percent_change_table_name, conn, if_exists='append', index=True)

            print(f"Updated {series_code} with new data.")
    else:
        print(f"No new data available for {table_name}.")

table_names = ['YIELD_daily','YIELD_weekly','YIELD_monthly']
# Update tables for each series code
for table_name in table_names:
    for series_code in series_codes:
        update_table(series_code, frequencies[table_name])
        

# Commit changes and close connection
conn.commit()
conn.close()

print("Database updated successfully.")


No new data available for YIELD_daily.
No new data available for YIELD_weekly.
No new data available for YIELD_monthly.
Database updated successfully.
