In [None]:
import threading
import logging
import pandas as pd
from queue import Queue
import time
from typing import List, Dict
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(threadName)s - %(levelname)s - %(message)s',
    filename='data_processing.log',
    filemode='w'
)
class DataProcessor:
    def __init__(self):
        self.results_queue = Queue()
        self.processed_data = {}
    def process_dataset(self, file_path: str):
        """Process a single CSV file and compute basic statistics"""
        try:
            thread_name = threading.current_thread().name
            logging.info(f"{thread_name}: Starting processing {file_path}")
            start_time = time.time()
            df = pd.read_csv(file_path)
            dataset_name = file_path.split('/')[-1].split('.')[0]
            numeric_stats = {}
            for column in df.select_dtypes(include=['number']).columns:
                numeric_stats[column] = {
                    'mean': df[column].mean(),
                    'max': df[column].max(),
                    'min': df[column].min(),
                    'median': df[column].median(),
                    'std': df[column].std()
                }
            result = {
                'dataset': dataset_name,
                'file_path': file_path,
                'stats': numeric_stats,
                'processing_time': time.time() - start_time,
                'rows_processed': len(df)
            }
            self.results_queue.put(result)
            logging.info(f"{thread_name}: Finished processing {file_path}")
        except Exception as e:
            logging.error(f"{thread_name}: Error processing {file_path} - {str(e)}")
            self.results_queue.put({
                'dataset': file_path,
                'error': str(e)
            })
    def process_all_datasets(self, file_paths: List[str]):
        """Process multiple datasets in parallel using threads"""
        threads = []
        for file_path in file_paths:
            thread = threading.Thread(
                target=self.process_dataset,
                args=(file_path,),
                name=f"Processor-{file_path.split('/')[-1]}"
            )
            threads.append(thread)
            thread.start()
            time.sleep(0.1)
        for thread in threads:
            thread.join()
        while not self.results_queue.empty():
            result = self.results_queue.get()
            if 'error' not in result:
                self.processed_data[result['dataset']] = result
            else:
                logging.warning(f"Failed to process {result['dataset']}: {result['error']}")
    def display_results(self):
        """Display the processing results"""
        print("\n=== Data Processing Results ===")
        for dataset, result in self.processed_data.items():
            print(f"\nDataset: {dataset}")
            print(f"File: {result['file_path']}")
            print(f"Rows processed: {result['rows_processed']}")
            print(f"Processing time: {result['processing_time']:.2f} seconds")            
            if result['stats']:
                print("\nStatistics:")
                for column, stats in result['stats'].items():
                    print(f"\nColumn: {column}")
                    print(f"  Mean: {stats['mean']:.2f}")
                    print(f"  Max: {stats['max']:.2f}")
                    print(f"  Min: {stats['min']:.2f}")
                    print(f"  Median: {stats['median']:.2f}")
                    print(f"  Std Dev: {stats['std']:.2f}")
            else:
                print("No numeric columns found for statistics")

def main():
    files_to_process = [
        'data/house_prices.csv',
        'data/sales_data.csv'
    ]
    processor = DataProcessor()
    processor.process_all_datasets(files_to_process)
    processor.display_results()   
    print("\nProcessing complete. Check 'data_processing.log' for detailed logs.")
if __name__ == "__main__":
    main()


=== Data Processing Results ===

Processing complete. Check 'data_processing.log' for detailed logs.
