# 📊 Comparative Analysis of Sorting Algorithms for Large-Scale Log File Processing
### 🔬 Benchmarking and Auto Visualization Notebook
This notebook benchmarks all implemented sorting algorithms across multiple log patterns and generates visual reports.


In [None]:
!pip install pandas matplotlib seaborn psutil

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from src.log_generator.log_generator import LogGenerator
from src.algorithms.quick_sort import quick_sort
from src.algorithms.merge_sort import merge_sort
from src.algorithms.heap_sort import heap_sort
from src.algorithms.radix_sort import radix_sort
from src.benchmarking.metrics_collector import MetricsCollector, OperationTracker
from src.benchmarking.performance_dashboard import plot_all_metrics


In [None]:
patterns = ['random', 'sorted', 'reverse', 'partial']
algorithms = {
    'Quick Sort': quick_sort,
    'Merge Sort': merge_sort,
    'Heap Sort': heap_sort,
    'Radix Sort': radix_sort
}
entry_count = 10000
repeats = 3
results = []

os.makedirs("logs", exist_ok=True)
os.makedirs("reports", exist_ok=True)


In [None]:
for pattern in patterns:
    print(f"\nPattern: {pattern.upper()}")
    log_file = f"logs/{pattern}_demo.txt"
    LogGenerator().generate_log_file(log_file, entry_count, pattern)
    with open(log_file, 'r') as f:
        original_logs = f.readlines()

    for name, algo in algorithms.items():
        print(f"  Testing: {name}")
        for run in range(repeats):
            logs_copy = original_logs.copy()
            tracker = OperationTracker()
            metrics = MetricsCollector()
            sorted_logs = metrics.measure(algo, logs_copy, tracker)
            results.append({
                'Algorithm': name,
                'Pattern': pattern,
                'Run': run + 1,
                **metrics.results[-1]
            })


In [None]:
df = pd.DataFrame(results)
df.to_csv("reports/demo_benchmark_results.csv", index=False)
df.head()

In [None]:
plot_all_metrics("reports/demo_benchmark_results.csv")