In [6]:
# Add the project root to sys.path if needed
import os
import sys
project_root = os.path.abspath(".")
if project_root not in sys.path:
    sys.path.append(project_root)

# Import methods and functions from main.py
from scripts.utilities.data_transformation_utils import configure_logging
from main import validate_and_archive_folders, run_data_ingestion, run_data_preprocessing

# Configure Logging
logger = configure_logging()

# Step 1: Validate and Archive Folders
print("Step 1: Validating and Archiving Folders")
validate_and_archive_folders()

# Step 2: Run Data Ingestion
print("Step 2: Running Data Ingestion")
run_data_ingestion()

# Step 3: Run Data Preprocessing
print("Step 3: Running Data Preprocessing")
run_data_preprocessing()

# If you want to test the entire main workflow
# Uncomment the following lines
# from main import main
# main()


AttributeError: 'NoneType' object has no attribute 'handlers'

In [None]:
import os
import sys
import logging
from scripts.data_ingestion.data_retrieval import main as data_retrieval_main
from scripts.data_preprocessing.balance_sheet_transformation import BalanceSheetTransformer
from scripts.data_preprocessing.income_statement_transformation import IncomeStatementTransformer
from scripts.data_preprocessing.cash_flow_transformation import CashFlowTransformer
from scripts.generate_scripts import main as generate_scripts_main
from scripts.utilities.data_transformation_utils import (
    configure_logging,
    get_data_paths,
    archive_files,
    prune_archives,
)

# Configure logging
logger = configure_logging()

def validate_and_archive_folders():
    """Validates the folder structure and archives existing files."""
    raw_data_dir, processed_data_dir = get_data_paths()

    # Define archive folders
    raw_archive_dir = os.path.join(raw_data_dir, 'archive')
    processed_archive_dir = os.path.join(processed_data_dir, 'archive')

    # Ensure directories exist
    for directory in [raw_data_dir, processed_data_dir, raw_archive_dir, processed_archive_dir]:
        os.makedirs(directory, exist_ok=True)
        logger.info(f"Validated or created directory: {directory}")

def main():
    """Main function to run the data processing pipeline."""
    validate_and_archive_folders()

    # Run data retrieval
    data_retrieval_main()

    # Process balance sheet data
    balance_sheet_transformer = BalanceSheetTransformer()
    balance_sheet_transformer.load_data()
    balance_sheet_transformer.process_data()
    balance_sheet_transformer.tag_data()
    balance_sheet_transformer.archive_files()

    # Process income statement data
    income_statement_transformer = IncomeStatementTransformer()
    income_statement_transformer.load_data()
    income_statement_transformer.process_data()
    income_statement_transformer.tag_data()
    income_statement_transformer.archive_files()

    # Process cash flow data
    cash_flow_transformer = CashFlowTransformer()
    cash_flow_transformer.load_data()
    cash_flow_transformer.process_data()
    cash_flow_transformer.tag_data()
    cash_flow_transformer.archive_files()

    # Generate scripts
    generate_scripts_main()

if __name__ == "__main__":
    main()

AttributeError: 'NoneType' object has no attribute 'handlers'

In [7]:
#