In [3]:
import sqlite3
import pandas as pd
import os
from datetime import datetime

def connect_to_database(db_path):
    """
    Connect to the specified database
    """
    try:
        if not os.path.exists(db_path):
            print(f"❌ Database file not found: {db_path}")
            return None
        
        conn = sqlite3.connect(db_path)
        print(f"✅ Successfully connected to database: {db_path}")
        return conn
    except sqlite3.Error as e:
        print(f"❌ Error connecting to database: {e}")
        return None

def analyze_database_structure(conn):
    """
    Analyze the database to find tables with date and buy columns
    """
    try:
        cursor = conn.cursor()
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = cursor.fetchall()
        
        print(f"📊 Found {len(tables)} table(s) in the database")
        
        suitable_tables = {}
        
        for table in tables:
            table_name = table[0]
            print(f"\n🔸 Analyzing table: {table_name}")
            
            # Get column info
            cursor.execute(f"PRAGMA table_info({table_name});")
            columns = cursor.fetchall()
            column_names = [col[1].lower() for col in columns]
            original_columns = [col[1] for col in columns]
            
            # Look for date columns
            date_columns = [col for col in original_columns if any(keyword in col.lower() 
                           for keyword in ['date', 'time', 'timestamp', 'day'])]
            
            # Look for buy columns
            buy_columns = [col for col in original_columns if any(keyword in col.lower() 
                          for keyword in ['buy', 'purchase', 'bid'])]
            
            # Look for instrument columns
            instrument_columns = [col for col in original_columns if any(keyword in col.lower() 
                                 for keyword in ['symbol', 'instrument', 'ticker', 'asset', 'code'])]
            
            print(f"   Date columns found: {date_columns}")
            print(f"   Buy columns found: {buy_columns}")
            print(f"   Instrument columns found: {instrument_columns}")
            
            if date_columns and buy_columns and instrument_columns:
                # Get row count
                cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
                row_count = cursor.fetchone()[0]
                
                suitable_tables[table_name] = {
                    'date_columns': date_columns,
                    'buy_columns': buy_columns,
                    'instrument_columns': instrument_columns,
                    'row_count': row_count,
                    'all_columns': original_columns
                }
                print(f"   ✅ Suitable table with {row_count:,} rows")
            else:
                print(f"   ⚠️  Missing required columns")
        
        return suitable_tables
        
    except sqlite3.Error as e:
        print(f"❌ Error analyzing database: {e}")
        return None

def get_instruments_and_create_csvs(conn, suitable_tables):
    """
    Extract data for each instrument and create CSV files
    """
    if not suitable_tables:
        print("❌ No suitable tables found")
        return
    
    # Create output directory
    output_dir = f"instrument_csvs_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    os.makedirs(output_dir, exist_ok=True)
    print(f"📁 Created output directory: {output_dir}")
    
    total_files_created = 0
    
    for table_name, table_info in suitable_tables.items():
        print(f"\n📊 Processing table: {table_name}")
        
        # Use first available columns
        date_col = table_info['date_columns'][0]
        buy_col = table_info['buy_columns'][0]
        instrument_col = table_info['instrument_columns'][0]
        
        print(f"   Using columns: {instrument_col} (instrument), {date_col} (date), {buy_col} (buy)")
        
        try:
            # Get unique instruments
            cursor = conn.cursor()
            cursor.execute(f"SELECT DISTINCT {instrument_col} FROM {table_name} WHERE {instrument_col} IS NOT NULL ORDER BY {instrument_col};")
            instruments = cursor.fetchall()
            
            print(f"   Found {len(instruments)} unique instruments")
            
            for instrument_tuple in instruments:
                instrument = instrument_tuple[0]
                if instrument is None:
                    continue
                
                try:
                    # Query data for this instrument
                    query = f"""
                    SELECT {date_col} as date, {buy_col} as buy
                    FROM {table_name}
                    WHERE {instrument_col} = ?
                    AND {date_col} IS NOT NULL
                    AND {buy_col} IS NOT NULL
                    ORDER BY {date_col}
                    """
                    
                    df = pd.read_sql_query(query, conn, params=[instrument])
                    
                    if not df.empty:
                        # Clean instrument name for filename
                        safe_instrument = "".join(c for c in str(instrument) if c.isalnum() or c in ('-', '_')).rstrip()
                        safe_table = "".join(c for c in table_name if c.isalnum() or c in ('-', '_')).rstrip()
                        
                        # Create filename
                        filename = f"{safe_instrument}_{safe_table}.csv"
                        filepath = os.path.join(output_dir, filename)
                        
                        # Save to CSV
                        df.to_csv(filepath, index=False)
                        
                        print(f"   ✅ {instrument}: {len(df)} rows → {filename}")
                        total_files_created += 1
                        
                        # Show sample of data
                        if len(df) > 0:
                            print(f"      Sample: {df.iloc[0]['date']} to {df.iloc[-1]['date']}")
                    else:
                        print(f"   ⚠️  {instrument}: No data found")
                        
                except Exception as e:
                    print(f"   ❌ Error processing {instrument}: {e}")
            
        except sqlite3.Error as e:
            print(f"❌ Error processing table {table_name}: {e}")
    
    print(f"\n🎯 Summary:")
    print(f"   Total CSV files created: {total_files_created}")
    print(f"   Output directory: {output_dir}")
    
    # List some of the created files
    if total_files_created > 0:
        files = os.listdir(output_dir)
        print(f"   Sample files created:")
        for i, file in enumerate(files[:10]):  # Show first 10 files
            file_path = os.path.join(output_dir, file)
            file_size = os.path.getsize(file_path)
            print(f"     • {file} ({file_size} bytes)")
        
        if len(files) > 10:
            print(f"     ... and {len(files) - 10} more files")

def create_summary_report(output_dir):
    """
    Create a summary report of all generated CSV files
    """
    if not os.path.exists(output_dir):
        print("❌ Output directory not found")
        return
    
    files = [f for f in os.listdir(output_dir) if f.endswith('.csv')]
    
    if not files:
        print("❌ No CSV files found")
        return
    
    print(f"\n📋 Creating summary report...")
    
    summary_data = []
    
    for file in files:
        filepath = os.path.join(output_dir, file)
        try:
            df = pd.read_csv(filepath)
            
            # Extract instrument and table from filename
            name_parts = file.replace('.csv', '').split('_')
            instrument = name_parts[0] if name_parts else 'Unknown'
            table = '_'.join(name_parts[1:]) if len(name_parts) > 1 else 'Unknown'
            
            summary_data.append({
                'instrument': instrument,
                'table': table,
                'filename': file,
                'row_count': len(df),
                'date_range_start': df['date'].min() if 'date' in df.columns else 'N/A',
                'date_range_end': df['date'].max() if 'date' in df.columns else 'N/A',
                'buy_min': df['buy'].min() if 'buy' in df.columns else 'N/A',
                'buy_max': df['buy'].max() if 'buy' in df.columns else 'N/A',
                'buy_mean': df['buy'].mean() if 'buy' in df.columns else 'N/A'
            })
            
        except Exception as e:
            print(f"   ⚠️  Error reading {file}: {e}")
    
    if summary_data:
        summary_df = pd.DataFrame(summary_data)
        summary_file = os.path.join(output_dir, 'summary_report.csv')
        summary_df.to_csv(summary_file, index=False)
        
        print(f"✅ Summary report saved: {summary_file}")
        print(f"\nTop 10 instruments by data volume:")
        top_instruments = summary_df.nlargest(10, 'row_count')[['instrument', 'row_count', 'date_range_start', 'date_range_end']]
        print(top_instruments.to_string(index=False))

# Main execution
db_path = "aia_big.db"

# Connect to database
conn = connect_to_database(db_path)

if conn:
    print("\n" + "="*60)
    print("🔍 ANALYZING DATABASE STRUCTURE")
    print("="*60)
    
    # Analyze database structure
    suitable_tables = analyze_database_structure(conn)
    
    if suitable_tables:
        print("\n" + "="*60)
        print("📥 CREATING CSV FILES FOR EACH INSTRUMENT")
        print("="*60)
        
        # Create CSV files for each instrument
        get_instruments_and_create_csvs(conn, suitable_tables)
        
        # Create summary report
        output_dir = f"instrument_csvs_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        if os.path.exists(output_dir):
            create_summary_report(output_dir)
    
    conn.close()
    print(f"\n✅ Database connection closed")
else:
    print("❌ Could not connect to database")

✅ Successfully connected to database: aia_big.db

🔍 ANALYZING DATABASE STRUCTURE
📊 Found 1 table(s) in the database

🔸 Analyzing table: gather
   Date columns found: ['date']
   Buy columns found: ['buy']
   Instrument columns found: ['instrument']
   ✅ Suitable table with 54,754,326 rows

📥 CREATING CSV FILES FOR EACH INSTRUMENT
📁 Created output directory: instrument_csvs_20250623_122515

📊 Processing table: gather
   Using columns: instrument (instrument), date (date), buy (buy)
   ✅ Suitable table with 54,754,326 rows

📥 CREATING CSV FILES FOR EACH INSTRUMENT
📁 Created output directory: instrument_csvs_20250623_122515

📊 Processing table: gather
   Using columns: instrument (instrument), date (date), buy (buy)
   Found 123 unique instruments
   Found 123 unique instruments
   ✅ AU200_AUD: 156890 rows → AU200_AUD_gather.csv
      Sample: 2025-02-21 20:59:05.094028 to 2025-03-07 20:59:01.496584
   ✅ AU200_AUD: 156890 rows → AU200_AUD_gather.csv
      Sample: 2025-02-21 20:59:05.094028

In [None]:
import glob
import pandas as pd
import os
from datetime import datetime

# First, let's install pyecharts if not already installed
try:
    from pyecharts.charts import Line, Kline
    from pyecharts import options as opts
    from pyecharts.globals import ThemeType
    print("✅ pyecharts is available")
except ImportError:
    print("📦 Installing pyecharts...")
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "pyecharts"])
    from pyecharts.charts import Line, Kline
    from pyecharts import options as opts
    from pyecharts.globals import ThemeType
    print("✅ pyecharts installed and imported")

def find_xau_usd_file():
    """
    Find XAU_USD CSV file in all instrument directories
    """
    print("🔍 Searching for XAU_USD files...")
    
    # Look for directories with pattern instrument_csvs_*
    directories = glob.glob("instrument_csvs_*")
    
    if not directories:
        print("❌ No CSV directories found")
        return None
    
    # Sort by creation time (newest first)
    directories.sort(key=lambda x: os.path.getctime(x), reverse=True)
    
    xau_files = []
    
    for directory in directories:
        # Look for files containing XAU or GOLD
        files = os.listdir(directory)
        for file in files:
            if file.endswith('.csv') and ('xau' in file.lower() or 'gold' in file.lower()):
                filepath = os.path.join(directory, file)
                file_size = os.path.getsize(filepath)
                
                # Get row count
                try:
                    df = pd.read_csv(filepath)
                    row_count = len(df)
                    if 'date' in df.columns and not df.empty:
                        date_start = df['date'].min()
                        date_end = df['date'].max()
                    else:
                        date_start = date_end = "N/A"
                except:
                    row_count = 0
                    date_start = date_end = "Error"
                
                xau_files.append({
                    'filepath': filepath,
                    'filename': file,
                    'directory': directory,
                    'size': file_size,
                    'rows': row_count,
                    'date_start': date_start,
                    'date_end': date_end
                })
    
    if xau_files:
        print(f"📊 Found {len(xau_files)} XAU/Gold related files:")
        for i, file_info in enumerate(xau_files):
            size_str = f"{file_info['size']:,} bytes" if file_info['size'] < 1024 else f"{file_info['size']/1024:.1f} KB"
            print(f"   {i+1}. {file_info['filename']}")
            print(f"      Path: {file_info['filepath']}")
            print(f"      Size: {size_str} | Rows: {file_info['rows']:,}")
            print(f"      Date range: {file_info['date_start']} to {file_info['date_end']}")
            print()
        
        return xau_files
    else:
        print("❌ No XAU_USD or Gold files found")
        print("Available files in directories:")
        for directory in directories:
            files = [f for f in os.listdir(directory) if f.endswith('.csv')][:5]  # Show first 5
            print(f"   {directory}: {files}")
        return None

def create_gold_price_chart(df, filename):
    """
    Create a professional ECharts visualization for Gold/XAU_USD data
    """
    try:
        # Prepare data
        if 'date' not in df.columns or 'buy' not in df.columns:
            print("❌ Required columns 'date' and 'buy' not found")
            return None
        
        # Convert date to string format for ECharts
        dates = df['date'].astype(str).tolist()
        buy_values = df['buy'].tolist()
        
        # Calculate some statistics
        min_price = min(buy_values)
        max_price = max(buy_values)
        avg_price = sum(buy_values) / len(buy_values)
        volatility = pd.Series(buy_values).std()
        
        # Create line chart with gold-specific styling
        line_chart = (
            Line(init_opts=opts.InitOpts(
                width="1400px", 
                height="700px",
                theme=ThemeType.VINTAGE,  # Golden theme for gold data
                bg_color="#1a1a1a"  # Dark background
            ))
            .add_xaxis(dates)
            .add_yaxis(
                series_name="Gold Price (XAU/USD)",
                y_axis=buy_values,
                is_smooth=True,
                line_opts=opts.LineStyleOpts(
                    width=3,
                    color="#FFD700"  # Gold color
                ),
                area_style_opts=opts.AreaStyleOpts(
                    opacity=0.3,
                    color="#FFD700"
                ),
                label_opts=opts.LabelOpts(is_show=False),
                markpoint_opts=opts.MarkPointOpts(
                    data=[
                        opts.MarkPointItem(
                            type_="max", 
                            name="Highest",
                            itemstyle_opts=opts.ItemStyleOpts(color="#FF6B6B")
                        ),
                        opts.MarkPointItem(
                            type_="min", 
                            name="Lowest",
                            itemstyle_opts=opts.ItemStyleOpts(color="#4ECDC4")
                        ),
                    ]
                ),
                markline_opts=opts.MarkLineOpts(
                    data=[
                        opts.MarkLineItem(
                            type_="average", 
                            name="Average",
                            linestyle_opts=opts.LineStyleOpts(color="#FFA500", width=2)
                        )
                    ]
                ),
            )
            .set_global_opts(
                title_opts=opts.TitleOpts(
                    title="💰 Gold Price Analysis (XAU/USD)",
                    subtitle=f"📊 {len(df):,} data points | 📅 {dates[0]} to {dates[-1]}",
                    title_textstyle_opts=opts.TextStyleOpts(
                        color="#FFD700",
                        font_size=24,
                        font_weight="bold"
                    ),
                    subtitle_textstyle_opts=opts.TextStyleOpts(
                        color="#CCCCCC",
                        font_size=14
                    )
                ),
                xaxis_opts=opts.AxisOpts(
                    type_="category",
                    name="📅 Date",
                    name_textstyle_opts=opts.TextStyleOpts(color="#CCCCCC"),
                    axislabel_opts=opts.LabelOpts(
                        rotate=45, 
                        interval=max(1, len(dates)//30),
                        color="#CCCCCC"
                    ),
                    axisline_opts=opts.AxisLineOpts(
                        linestyle_opts=opts.LineStyleOpts(color="#444444")
                    )
                ),
                yaxis_opts=opts.AxisOpts(
                    type_="value",
                    name="💵 Price (USD)",
                    name_textstyle_opts=opts.TextStyleOpts(color="#CCCCCC"),
                    min_=min_price * 0.995,
                    max_=max_price * 1.005,
                    axislabel_opts=opts.LabelOpts(
                        formatter="${value}",
                        color="#CCCCCC"
                    ),
                    axisline_opts=opts.AxisLineOpts(
                        linestyle_opts=opts.LineStyleOpts(color="#444444")
                    ),
                    splitline_opts=opts.SplitLineOpts(
                        linestyle_opts=opts.LineStyleOpts(color="#333333", opacity=0.5)
                    )
                ),
                tooltip_opts=opts.TooltipOpts(
                    trigger="axis",
                    background_color="rgba(0,0,0,0.8)",
                    border_color="#FFD700",
                    textstyle_opts=opts.TextStyleOpts(color="#FFFFFF")
                ),
                legend_opts=opts.LegendOpts(
                    pos_top="5%",
                    textstyle_opts=opts.TextStyleOpts(color="#CCCCCC")
                ),
                datazoom_opts=[
                    opts.DataZoomOpts(
                        range_start=0, 
                        range_end=100,
                        background_color="#333333",
                        selected_data_background_color="#FFD700"
                    ),
                    opts.DataZoomOpts(
                        type_="inside", 
                        range_start=0, 
                        range_end=100
                    ),
                ],
                toolbox_opts=opts.ToolboxOpts(
                    is_show=True,
                    pos_right="2%",
                    feature=opts.ToolBoxFeatureOpts(
                        save_as_image=opts.ToolBoxFeatureSaveAsImageOpts(
                            title="Save Chart"
                        ),
                        data_zoom=opts.ToolBoxFeatureDataZoomOpts(
                            title={"zoom": "Zoom", "back": "Reset Zoom"}
                        ),
                        restore=opts.ToolBoxFeatureRestoreOpts(
                            title="Restore"
                        ),
                    ),
                    itemstyle_opts=opts.ItemStyleOpts(color="#CCCCCC")
                ),
                graphic_opts=[
                    opts.GraphicGroup(
                        graphic_item=opts.GraphicItem(
                            rotation=0,
                            bounding="raw",
                            right=100,
                            bottom=100,
                            z=100,
                        ),
                        children=[
                            opts.GraphicRect(
                                graphic_item=opts.GraphicItem(
                                    left="center", top="center", z=100
                                ),
                                graphic_shape_opts=opts.GraphicShapeOpts(
                                    width=300, height=120
                                ),
                                graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
                                    fill="rgba(0,0,0,0.8)",
                                    stroke="#FFD700",
                                    line_width=2
                                ),
                            ),
                            opts.GraphicText(
                                graphic_item=opts.GraphicItem(
                                    left="center", top="center", z=100
                                ),
                                graphic_textstyle_opts=opts.GraphicTextStyleOpts(
                                    text=f"📈 Statistics\n💰 Min: ${min_price:.2f}\n🎯 Max: ${max_price:.2f}\n📊 Avg: ${avg_price:.2f}\n📈 Volatility: ${volatility:.2f}",
                                    font="14px Arial",
                                    graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
                                        fill="#FFFFFF"
                                    ),
                                ),
                            ),
                        ],
                    )
                ]
            )
        )
        
        # Generate HTML file
        html_filename = f"xau_usd_gold_analysis.html"
        line_chart.render(html_filename)
        
        return line_chart, html_filename
        
    except Exception as e:
        print(f"❌ Error creating visualization: {e}")
        import traceback
        traceback.print_exc()
        return None, None

# Main execution - Find and display XAU_USD data
print("🏆 GOLD PRICE VISUALIZATION (XAU/USD)")
print("=" * 60)

# Find XAU_USD files
xau_files = find_xau_usd_file()

if xau_files:
    # Use the first (most recent) XAU file found
    selected_file = xau_files[0]
    filepath = selected_file['filepath']
    filename = selected_file['filename']
    
    print(f"🎯 Loading Gold Price Data...")
    print(f"   📁 File: {filename}")
    print(f"   📍 Path: {filepath}")
    print(f"   📊 Rows: {selected_file['rows']:,}")
    
    try:
        # Load the CSV data
        df = pd.read_csv(filepath)
        
        print(f"\n📋 Data Preview:")
        print("=" * 40)
        print(df.head(10).to_string(index=False))
        
        if len(df) > 10:
            print(f"\n... (showing first 10 of {len(df):,} rows)")
        
        # Show basic statistics
        if 'buy' in df.columns:
            print(f"\n💰 Gold Price Statistics:")
            print(f"   💵 Minimum Price: ${df['buy'].min():.2f}")
            print(f"   💵 Maximum Price: ${df['buy'].max():.2f}")
            print(f"   💵 Average Price: ${df['buy'].mean():.2f}")
            print(f"   📊 Price Range: ${df['buy'].max() - df['buy'].min():.2f}")
            print(f"   📈 Standard Deviation: ${df['buy'].std():.2f}")
        
        print(f"\n🎨 Creating Professional Gold Price Chart...")
        chart, html_file = create_gold_price_chart(df, filename)
        
        if chart and html_file:
            print(f"\n🎉 SUCCESS! Gold Price Chart Created!")
            print("=" * 50)
            print(f"📁 Chart file: {html_file}")
            print(f"🌐 Open in browser to view the interactive chart")
            print(f"✨ Features included:")
            print(f"   • 💰 Professional gold-themed styling")
            print(f"   • 📊 Interactive zoom and pan")
            print(f"   • 🎯 Hover tooltips with exact values")
            print(f"   • 📈 Min/Max/Average markers")
            print(f"   • 💾 Export and save options")
            print(f"   • 📱 Responsive design")
            print(f"   • 📊 Built-in statistics display")
        else:
            print(f"❌ Failed to create chart")
            
    except Exception as e:
        print(f"❌ Error loading data: {e}")
        import traceback
        traceback.print_exc()

else:
    print("❌ No XAU_USD files found")
    print("💡 Make sure you've run the database extraction code first")