In [9]:
import wrds
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cvxpy as cp
from arch import arch_model

# Connect to WRDS
db = wrds.Connection(wrds_username='simengut')

Loading library list...
Done


In [11]:
# Define bond tickers
bond_tickers = ['SHY']

# SQL query to fetch bond data with a smaller date range and more efficient query
bond_sql_query = """
SELECT 
    a.permno,
    a.date,
    b.ticker,
    b.comnam,
    b.cusip,
    a.prc,
    a.vol,
    a.ret
FROM 
    crsp.dsf AS a
JOIN 
    crsp.dsenames AS b
ON 
    a.permno = b.permno
WHERE 
    b.ticker IN ('TLT', 'IEF', 'SHY', 'AGG', 'BND')
    AND a.date BETWEEN '2007-01-01' AND '2023-12-31'  -- Reduced date range
    AND a.date >= b.namedt
    AND a.date <= b.nameendt
ORDER BY 
    b.ticker,
    a.date;
"""

# Execute query with timeout and chunking
try:
    # Set a longer timeout and use chunking
    bond_df = db.raw_sql(bond_sql_query, 
                        coerce_float=True,
                        date_cols=['date'],
                        chunksize=10000)  # Process in chunks of 10,000 rows
    
    # If the result is a generator (from chunksize), convert to DataFrame
    if hasattr(bond_df, '__iter__'):
        bond_df = pd.concat(bond_df)
    
    # Display the first few rows
    print("First few rows of bond data:")
    print(bond_df.head())
    
    # Create a pivot table of returns
    bond_returns = bond_df.pivot(index='date', columns='ticker', values='ret')
    
    # Display summary statistics
    print("\nSummary statistics of bond returns:")
    print(bond_returns.describe())
    
    # Save to CSV
    bond_returns.to_csv('bond_returns.csv')
    
    print("\nBond ETFs included:")
    print("TLT: 20+ Year Treasury Bond ETF")
    print("IEF: 7-10 Year Treasury Bond ETF")
    print("SHY: 1-3 Year Treasury Bond ETF")
    print("AGG: US Aggregate Bond ETF")
    print("BND: Vanguard Total Bond Market ETF")

except Exception as e:
    print(f"Error occurred: {str(e)}")
    
    # Try alternative approach with smaller chunks
    print("\nTrying alternative approach with smaller chunks...")
    
    # Process each ticker separately
    all_dfs = []
    for ticker in bond_tickers:
        try:
            ticker_query = f"""
            SELECT 
                a.permno,
                a.date,
                b.ticker,
                b.comnam,
                b.cusip,
                a.prc,
                a.vol,
                a.ret
            FROM 
                crsp.dsf AS a
            JOIN 
                crsp.dsenames AS b
            ON 
                a.permno = b.permno
            WHERE 
                b.ticker = '{ticker}'
                AND a.date BETWEEN '2007-01-01' AND '2024-12-31'
                AND a.date >= b.namedt
                AND a.date <= b.nameendt
            ORDER BY 
                a.date;
            """
            
            ticker_df = db.raw_sql(ticker_query, 
                                 coerce_float=True,
                                 date_cols=['date'])
            all_dfs.append(ticker_df)
            print(f"Successfully retrieved data for {ticker}")
            
        except Exception as e:
            print(f"Error retrieving data for {ticker}: {str(e)}")
    
    if all_dfs:
        # Combine all DataFrames
        bond_df = pd.concat(all_dfs, ignore_index=True)
        
        # Create returns pivot table
        bond_returns = bond_df.pivot(index='date', columns='ticker', values='ret')
        
        # Save to CSV
        bond_returns.to_csv('bond_returns.csv')
        
        print("\nSuccessfully saved bond returns to CSV")
    else:
        print("No data was retrieved for any ticker")

Error occurred: first argument must be an iterable of pandas objects, you passed an object of type "DataFrame"

Trying alternative approach with smaller chunks...
Successfully retrieved data for SHY

Successfully saved bond returns to CSV


In [7]:
# Define bond tickers
bond_tickers = ['TLT', 'IEF', 'SHY', 'AGG', 'BND']

# SQL query to fetch bond data with a smaller date range and more efficient query
bond_sql_query = """
SELECT 
    a.permno,
    a.date,
    b.ticker,
    b.comnam,
    b.cusip,
    a.prc,
    a.vol,
    a.ret
FROM 
    crsp.dsf AS a
JOIN 
    crsp.dsenames AS b
ON 
    a.permno = b.permno
WHERE 
    b.ticker IN ('TLT', 'IEF', 'SHY', 'AGG', 'BND')
    AND a.date BETWEEN '2007-01-01' AND '2023-12-31'  -- Reduced date range
    AND a.date >= b.namedt
    AND a.date <= b.nameendt
ORDER BY 
    b.ticker,
    a.date;
"""

# Execute query with timeout and chunking
try:
    # Set a longer timeout and use chunking
    bond_df = db.raw_sql(bond_sql_query, 
                        coerce_float=True,
                        date_cols=['date'],
                        chunksize=10000)  # Process in chunks of 10,000 rows
    
    # If the result is a generator (from chunksize), convert to DataFrame
    if hasattr(bond_df, '__iter__'):
        bond_df = pd.concat(bond_df)
    
    # Display the first few rows
    print("First few rows of bond data:")
    print(bond_df.head())
    
    # Create a pivot table of returns
    bond_returns = bond_df.pivot(index='date', columns='ticker', values='ret')
    
    # Display summary statistics
    print("\nSummary statistics of bond returns:")
    print(bond_returns.describe())
    
    # Save to CSV
    bond_returns.to_csv('bond_returns.csv')
    
    print("\nBond ETFs included:")
    print("TLT: 20+ Year Treasury Bond ETF")
    print("IEF: 7-10 Year Treasury Bond ETF")
    print("SHY: 1-3 Year Treasury Bond ETF")
    print("AGG: US Aggregate Bond ETF")
    print("BND: Vanguard Total Bond Market ETF")

except Exception as e:
    print(f"Error occurred: {str(e)}")
    
    # Try alternative approach with smaller chunks
    print("\nTrying alternative approach with smaller chunks...")
    
    # Process each ticker separately
    all_dfs = []
    for ticker in bond_tickers:
        try:
            ticker_query = f"""
            SELECT 
                a.permno,
                a.date,
                b.ticker,
                b.comnam,
                b.cusip,
                a.prc,
                a.vol,
                a.ret
            FROM 
                crsp.dsf AS a
            JOIN 
                crsp.dsenames AS b
            ON 
                a.permno = b.permno
            WHERE 
                b.ticker = '{ticker}'
                AND a.date BETWEEN '2007-01-01' AND '2023-12-31'
                AND a.date >= b.namedt
                AND a.date <= b.nameendt
            ORDER BY 
                a.date;
            """
            
            ticker_df = db.raw_sql(ticker_query, 
                                 coerce_float=True,
                                 date_cols=['date'])
            all_dfs.append(ticker_df)
            print(f"Successfully retrieved data for {ticker}")
            
        except Exception as e:
            print(f"Error retrieving data for {ticker}: {str(e)}")
    
    if all_dfs:
        # Combine all DataFrames
        bond_df = pd.concat(all_dfs, ignore_index=True)
        
        # Create returns pivot table
        bond_returns = bond_df.pivot(index='date', columns='ticker', values='ret')
        
        # Save to CSV
        bond_returns.to_csv('bond_returns.csv')
        
        print("\nSuccessfully saved bond returns to CSV")
    else:
        print("No data was retrieved for any ticker")

Error occurred: Can't reconnect until invalid transaction is rolled back.  Please rollback() fully before proceeding (Background on this error at: https://sqlalche.me/e/20/8s2b)

Trying alternative approach with smaller chunks...
Error retrieving data for TLT: Can't reconnect until invalid transaction is rolled back.  Please rollback() fully before proceeding (Background on this error at: https://sqlalche.me/e/20/8s2b)
Error retrieving data for IEF: Can't reconnect until invalid transaction is rolled back.  Please rollback() fully before proceeding (Background on this error at: https://sqlalche.me/e/20/8s2b)
Error retrieving data for SHY: Can't reconnect until invalid transaction is rolled back.  Please rollback() fully before proceeding (Background on this error at: https://sqlalche.me/e/20/8s2b)
Error retrieving data for AGG: Can't reconnect until invalid transaction is rolled back.  Please rollback() fully before proceeding (Background on this error at: https://sqlalche.me/e/20/8s2b