In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import json
import os
import math
import io
import base64

# --- Configuration ---
DATA_BASE_DIR = '../data/scenarios'
REPORT_DIR = '../reports'
os.makedirs(REPORT_DIR, exist_ok=True)

SCENARIOS = ['FD001', 'FD002', 'FD003', 'FD004']

SCENARIO_TITLES = {
    "FD001": "FD001: Sea Level Conditions (1 Regime), Single Fault Mode (HPC)",
    "FD002": "FD002: Multiple Operating Conditions (6 Regimes), Single Fault Mode (HPC)",
    "FD003": "FD003: Sea Level Conditions (1 Regime), Multiple Fault Modes (HPC, Fan)",
    "FD004": "FD004: Multiple Operating Conditions (6 Regimes), Multiple Fault Modes (HPC, Fan)"
}

def load_scenario_data(scenario_name):
    folder_path = os.path.join(DATA_BASE_DIR, scenario_name)
    csv_path = os.path.join(folder_path, f"train_{scenario_name}.csv")
    json_path = os.path.join(folder_path, "info.json")
    
    if not os.path.exists(csv_path): return None, None
        
    df = pd.read_csv(csv_path)
    with open(json_path, 'r') as f:
        metadata = json.load(f)
    return df, metadata

def plot_to_base64(df, metadata, scenario_name):
    """Generates plot, encodes to base64 string, returns string."""
    columns = df.columns
    num_cols = len(columns)
    
    # Grid Calculation
    n_cols_plot = 5
    n_rows_plot = math.ceil(num_cols / n_cols_plot)
    
    fig, axes = plt.subplots(n_rows_plot, n_cols_plot, figsize=(20, 4 * n_rows_plot))
    
    full_title = SCENARIO_TITLES.get(scenario_name, scenario_name)
    fig.suptitle(full_title, fontsize=20, y=1.02)
    
    axes = axes.flatten()
    col_meta_map = metadata.get('column_metadata', {})
    
    for i, col_name in enumerate(columns):
        ax = axes[i]
        ax.hist(df[col_name], bins=50, color='skyblue', edgecolor='black', alpha=0.7)
        
        # Title Logic
        col_info = col_meta_map.get(col_name, {})
        title_text = col_info.get('short_description', col_name)
        
        if col_name.startswith("op"):
             label = col_info.get('label', col_name)
             if ":" in label: title_text = label.split(":", 1)[1].strip()
             else: title_text = label
             
        if col_name == "RUL": title_text = "RUL (Target)"

        ax.set_title(title_text, fontsize=10, fontweight='bold')
        ax.tick_params(axis='x', rotation=45)
        ax.grid(axis='y', linestyle='--', alpha=0.5)

    for j in range(i + 1, len(axes)):
        axes[j].axis('off')
        
    plt.tight_layout()
    
    # Encode to Base64
    buf = io.BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight')
    plt.close(fig)
    buf.seek(0)
    img_str = base64.b64encode(buf.read()).decode('utf-8')
    
    print(f"Encoded {scenario_name}...")
    return img_str, full_title

def generate_standalone_html(plot_data):
    """Creates a single HTML file with embedded images."""
    html_content = """
    <!DOCTYPE html>
    <html>
    <head>
        <title>Hackathon Data Analysis</title>
        <style>
            body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; margin: 40px; background-color: #f9f9f9; }
            .container { max-width: 1200px; margin: auto; background: white; padding: 40px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
            h1 { color: #333; text-align: center; }
            .scenario { margin-bottom: 60px; border-bottom: 2px solid #eee; padding-bottom: 40px; }
            .scenario h2 { color: #0056b3; }
            img { max-width: 100%; height: auto; border: 1px solid #ddd; }
        </style>
    </head>
    <body>
        <div class="container">
            <h1>NASA Turbofan Dataset - Exploratory Analysis</h1>
            <p style="text-align:center; color:#666;">Generated Overview of Training Data Distributions</p>
            <hr>
    """
    
    for img_str, title in plot_data:
        html_content += f"""
        <div class="scenario">
            <h2>{title}</h2>
            <img src="data:image/png;base64,{img_str}" alt="{title}">
        </div>
        """
        
    html_content += """
        </div>
    </body>
    </html>
    """
    
    report_path = os.path.join(REPORT_DIR, "data_analysis_report.html")
    with open(report_path, "w") as f:
        f.write(html_content)
    print(f"\nSUCCESS: Standalone Report generated at: {report_path}")

# --- Execution ---
if __name__ == "__main__":
    generated_plots = []
    
    for scenario in SCENARIOS:
        df, meta = load_scenario_data(scenario)
        if df is not None:
            img_data, title = plot_to_base64(df, meta, scenario)
            generated_plots.append((img_data, title))
            
    generate_standalone_html(generated_plots)

Encoded FD001...
Encoded FD002...
Encoded FD003...
Encoded FD004...

SUCCESS: Standalone Report generated at: ../reports/data_analysis_report.html
