# Data Debugging and Verification

Let's check what data is actually in the database.

In [None]:
import sys
import os
import pandas as pd
import pyodbc
import warnings

# Add project root to Python path
project_root = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))
sys.path.insert(0, project_root)

from config.database_config import get_connection_string

warnings.filterwarnings('ignore')

# Connect to database
conn = pyodbc.connect(get_connection_string())
print("Connected to database")

## 1. Check Available Metal Codes

In [ ]:
query = """
SELECT 
    m.MetalCode,
    m.ExchangeCode,
    t.TenorTypeName,
    COUNT(*) as RecordCount,
    MIN(p.TradeDate) as FirstDate,
    MAX(p.TradeDate) as LastDate
FROM T_CommodityPrice p
INNER JOIN M_Metal m ON p.MetalID = m.MetalID
INNER JOIN M_TenorType t ON p.TenorTypeID = t.TenorTypeID
GROUP BY m.MetalCode, m.ExchangeCode, t.TenorTypeName
ORDER BY m.MetalCode, t.TenorTypeName
"""

data_summary = pd.read_sql(query, conn)
print("Data summary by metal and tenor:")
print(data_summary)

## 2. Check Available Tenor Types

In [None]:
query = """
SELECT TenorTypeID, TenorTypeName
FROM M_TenorType
ORDER BY TenorTypeID
"""

tenors_df = pd.read_sql(query, conn)
print("Available tenor types:")
print(tenors_df)

## 3. Check Data Count by Metal and Tenor

In [None]:
query = """
SELECT 
    m.MetalCode,
    m.ExchangeCode,
    t.TenorTypeName,
    COUNT(*) as RecordCount,
    MIN(p.TradeDate) as FirstDate,
    MAX(p.TradeDate) as LastDate
FROM T_CommodityPrice p
INNER JOIN M_Metal m ON p.MetalID = m.MetalID
INNER JOIN M_TenorType t ON p.TenorTypeID = t.TenorTypeID
GROUP BY m.MetalCode, m.ExchangeCode, t.TenorTypeName
ORDER BY m.MetalCode, t.TenorTypeID
"""

data_summary = pd.read_sql(query, conn)
print("Data summary by metal and tenor:")
print(data_summary)

## 4. Check Recent Price Data

In [None]:
query = """
SELECT TOP 20
    p.TradeDate,
    m.MetalCode,
    m.ExchangeCode,
    t.TenorTypeName,
    p.SettlementPrice,
    p.Volume,
    p.OpenInterest
FROM T_CommodityPrice p
INNER JOIN M_Metal m ON p.MetalID = m.MetalID
INNER JOIN M_TenorType t ON p.TenorTypeID = t.TenorTypeID
WHERE p.SettlementPrice IS NOT NULL
ORDER BY p.TradeDate DESC, m.MetalCode, t.TenorTypeID
"""

recent_data = pd.read_sql(query, conn)
print("\nRecent price data (top 20):")
print(recent_data)

## 5. Check Data for Specific Exchange

In [None]:
# Check each exchange separately
for exchange in ['LME', 'SHFE', 'CMX']:
    query = f"""
    SELECT TOP 10
        p.TradeDate,
        m.MetalCode,
        t.TenorTypeName,
        p.SettlementPrice
    FROM T_CommodityPrice p
    INNER JOIN M_Metal m ON p.MetalID = m.MetalID
    INNER JOIN M_TenorType t ON p.TenorTypeID = t.TenorTypeID
    WHERE m.ExchangeCode = '{exchange}'
        AND p.SettlementPrice IS NOT NULL
    ORDER BY p.TradeDate DESC
    """
    
    exchange_data = pd.read_sql(query, conn)
    print(f"\n{exchange} Data (latest 10 records):")
    if len(exchange_data) > 0:
        print(exchange_data)
    else:
        print(f"No data found for {exchange}")

## 6. Check Generic Futures Data

In [None]:
query = """
SELECT 
    m.ExchangeCode,
    t.TenorTypeName,
    COUNT(*) as Count,
    AVG(p.SettlementPrice) as AvgPrice
FROM T_CommodityPrice p
INNER JOIN M_Metal m ON p.MetalID = m.MetalID
INNER JOIN M_TenorType t ON p.TenorTypeID = t.TenorTypeID
WHERE t.TenorTypeName LIKE '%Generic%Future%'
    OR t.TenorTypeName LIKE '%1st%'
    OR t.TenorTypeName LIKE '%2nd%'
    OR t.TenorTypeName LIKE '%3rd%'
    OR t.TenorTypeName LIKE '%th Future%'
GROUP BY m.ExchangeCode, t.TenorTypeName
ORDER BY m.ExchangeCode, t.TenorTypeName
"""

generic_data = pd.read_sql(query, conn)
print("\nGeneric futures data summary:")
print(generic_data)

## 7. Test the Exact Query Used in Visualization

In [None]:
# Test the exact query from the visualization notebook
days = 90
query = f"""
SELECT 
    p.TradeDate,
    m.MetalCode,
    m.ExchangeCode,
    t.TenorTypeName,
    p.SettlementPrice,
    p.Volume,
    p.OpenInterest,
    CASE 
        WHEN t.TenorTypeName LIKE 'Generic 1%' THEN 1
        WHEN t.TenorTypeName LIKE 'Generic 2%' THEN 2
        WHEN t.TenorTypeName LIKE 'Generic 3%' THEN 3
        WHEN t.TenorTypeName LIKE 'Generic 4%' THEN 4
        WHEN t.TenorTypeName LIKE 'Generic 5%' THEN 5
        WHEN t.TenorTypeName LIKE 'Generic 6%' THEN 6
        WHEN t.TenorTypeName LIKE 'Generic 7%' THEN 7
        WHEN t.TenorTypeName LIKE 'Generic 8%' THEN 8
        WHEN t.TenorTypeName LIKE 'Generic 9%' THEN 9
        WHEN t.TenorTypeName LIKE 'Generic 10%' THEN 10
        WHEN t.TenorTypeName LIKE 'Generic 11%' THEN 11
        WHEN t.TenorTypeName LIKE 'Generic 12%' THEN 12
        ELSE 0
    END as TenorNumber
FROM T_CommodityPrice p
INNER JOIN M_Metal m ON p.MetalID = m.MetalID
INNER JOIN M_TenorType t ON p.TenorTypeID = t.TenorTypeID
WHERE 
    t.TenorTypeName LIKE 'Generic%Future%'
    AND p.TradeDate >= DATEADD(day, -{days}, GETDATE())
    AND p.SettlementPrice IS NOT NULL
ORDER BY p.TradeDate DESC, m.ExchangeCode, t.TenorTypeID
"""

test_df = pd.read_sql(query, conn)
print(f"\nQuery results for last {days} days:")
print(f"Total records: {len(test_df)}")
if len(test_df) > 0:
    print("\nFirst 10 records:")
    print(test_df.head(10))
    print("\nTenorNumber distribution:")
    print(test_df['TenorNumber'].value_counts().sort_index())
else:
    print("No data found with this query!")

## 8. Alternative Query Test

In [None]:
# Try a simpler query without the LIKE conditions
query = """
SELECT TOP 50
    p.TradeDate,
    m.MetalCode,
    m.ExchangeCode,
    t.TenorTypeName,
    t.TenorTypeID,
    p.SettlementPrice
FROM T_CommodityPrice p
INNER JOIN M_Metal m ON p.MetalID = m.MetalID
INNER JOIN M_TenorType t ON p.TenorTypeID = t.TenorTypeID
WHERE p.SettlementPrice IS NOT NULL
    AND p.TradeDate >= '2025-01-01'
ORDER BY p.TradeDate DESC, m.ExchangeCode
"""

simple_df = pd.read_sql(query, conn)
print("\nSimple query results:")
print(simple_df)

# Check unique tenor types
print("\nUnique TenorTypeNames in the data:")
print(simple_df['TenorTypeName'].unique())

## Close Connection

In [None]:
conn.close()
print("Database connection closed")