In [None]:
import os
import sys
import dask.dataframe as dd
import logging
from tqdm.notebook import tqdm
from dask.diagnostics import ProgressBar

# Define project root
project_root = "/content/drive/MyDrive/Projects/finance-deep-analysis"
sys.path.append(project_root)

# Set up logging
log_path = os.path.join(project_root, 'eda.log')
logging.basicConfig(level=logging.INFO, handlers=[
    logging.FileHandler(log_path),
    logging.StreamHandler(sys.stdout)
])
logger = logging.getLogger('eda')

def load_data():
    """Load the featured data for analysis."""
    data_path = os.path.join(project_root, 'featured_data.parquet')
    if os.path.exists(data_path):
        logger.info(f"Loading data from {data_path}")
        data = dd.read_parquet(data_path)
        logger.info("Data loaded successfully")
        return data
    else:
        logger.error(f"Data file not found at {data_path}")
        raise FileNotFoundError(f"Data file not found at {data_path}")

def descriptive_analysis(data):
    """Perform descriptive analysis on the data with a progress bar."""
    logger.info("Performing descriptive analysis")
    print("Performing descriptive analysis...")

    with ProgressBar():
        summary = data.describe().compute()
    
    print(summary)
    
    # Save the summary to a file
    summary_path = os.path.join(project_root, 'eda_summary.txt')
    with open(summary_path, 'w') as file:
        file.write(summary.to_string())
    logger.info(f"Summary saved to '{summary_path}'")
    print(f"Summary saved to '{summary_path}'")

def summarize_statistics(data):
    """Summarize key statistics and insights from the data."""
    logger.info("Summarizing key statistics")
    print("Summarizing key statistics...")
    
    with ProgressBar():
        summary = data.describe().compute().to_string()
    
    summary_path = os.path.join(project_root, 'eda_summary.txt')
    with open(summary_path, 'w') as file:
        file.write(summary)
    logger.info(f"Summary saved to '{summary_path}'")
    print(f"Summary saved to '{summary_path}'")

if __name__ == '__main__':
    try:
        data = load_data()
        descriptive_analysis(data)
        summarize_statistics(data)
        logger.info("EDA completed successfully")
        print("EDA completed successfully")
    except Exception as e:
        logger.error(f"Error during EDA: {e}")
        print(f"Error during EDA: {e}")
