# VISUALISASI DATA BARU - Hasil Analisis Konsumsi BBM

Notebook ini berisi visualisasi lengkap untuk data konsumsi BBM periode November 6-9, 2025, termasuk:
- Uji normalitas dengan visualisasi
- Distribusi data
- Deteksi outlier
- Tren konsumsi
- Perbandingan sebelum dan sesudah interpolasi

In [None]:
# IMPORT VISUALIZATION LIBRARIES

import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import numpy as np
import pandas as pd
from matplotlib.patches import Rectangle
from matplotlib.gridspec import GridSpec
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")
sns.set_context("notebook", font_scale=1.1)

# Set figure quality
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['figure.figsize'] = (12, 6)

print("âœ“ Visualization libraries imported successfully")

## 1. HISTOGRAM & KURVA NORMAL
Visualisasi distribusi konsumsi BBM harian dengan kurva normal overlay

In [None]:
# HISTOGRAM WITH NORMAL CURVE & KDE

def plot_histogram_with_normal(daily_df, title_suffix=""):
    """
    Plot histogram dengan kurva normal overlay dan KDE
    """
    if len(daily_df) == 0:
        print("Tidak ada data untuk divisualisasikan")
        return
    
    consumption = daily_df['avg_consumption_kmL'].values
    
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Histogram
    n, bins, patches = ax.hist(consumption, bins='auto', density=True, 
                                alpha=0.7, color='skyblue', edgecolor='black',
                                label='Data Aktual')
    
    # KDE (Kernel Density Estimation)
    from scipy.stats import gaussian_kde
    kde = gaussian_kde(consumption)
    x_range = np.linspace(consumption.min(), consumption.max(), 100)
    ax.plot(x_range, kde(x_range), 'b-', linewidth=2, 
            label='KDE (Distribusi Aktual)')
    
    # Normal distribution overlay
    mean = np.mean(consumption)
    std = np.std(consumption, ddof=1)
    x_norm = np.linspace(consumption.min(), consumption.max(), 100)
    normal_curve = stats.norm.pdf(x_norm, mean, std)
    ax.plot(x_norm, normal_curve, 'r--', linewidth=2, 
            label=f'Distribusi Normal\n(Î¼={mean:.2f}, Ïƒ={std:.2f})')
    
    # Add vertical lines for mean and median
    median = np.median(consumption)
    ax.axvline(mean, color='red', linestyle='--', linewidth=1.5, alpha=0.7)
    ax.axvline(median, color='green', linestyle='--', linewidth=1.5, alpha=0.7)
    
    # Annotations
    ax.text(mean, ax.get_ylim()[1]*0.95, f'Mean: {mean:.2f}', 
            horizontalalignment='center', color='red', fontweight='bold')
    ax.text(median, ax.get_ylim()[1]*0.88, f'Median: {median:.2f}', 
            horizontalalignment='center', color='green', fontweight='bold')
    
    # Labels and title
    ax.set_xlabel('Konsumsi BBM (km/L)', fontsize=12, fontweight='bold')
    ax.set_ylabel('Densitas Probabilitas', fontsize=12, fontweight='bold')
    ax.set_title(f'Distribusi Konsumsi BBM Harian{title_suffix}\n' + 
                 f'N={len(consumption)}, Mean={mean:.2f} km/L, Std={std:.2f} km/L',
                 fontsize=14, fontweight='bold', pad=20)
    
    ax.legend(loc='upper right', fontsize=10)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print(f"\nðŸ“Š Histogram & Normal Curve Plot Created")
    print(f"   N = {len(consumption)}, Mean = {mean:.2f}, Std = {std:.2f}")

# Plot untuk data interpolated
print("="*80)
print("HISTOGRAM - DATA SETELAH INTERPOLASI")
print("="*80)
plot_histogram_with_normal(daily_baru_interpolated, " (After Interpolation)")

## 2. Q-Q PLOT (Quantile-Quantile Plot)
Membandingkan quantiles data dengan quantiles distribusi normal teoritis

In [None]:
# Q-Q PLOT FOR NORMALITY ASSESSMENT

def plot_qq(daily_df, title_suffix=""):
    """
    Q-Q Plot untuk menilai normalitas
    """
    if len(daily_df) == 0:
        print("Tidak ada data untuk divisualisasikan")
        return
    
    consumption = daily_df['avg_consumption_kmL'].values
    
    fig, ax = plt.subplots(figsize=(10, 10))
    
    # Q-Q plot
    stats.probplot(consumption, dist="norm", plot=ax)
    
    # Customize
    ax.get_lines()[0].set_markerfacecolor('blue')
    ax.get_lines()[0].set_markeredgecolor('darkblue')
    ax.get_lines()[0].set_markersize(8)
    ax.get_lines()[0].set_alpha(0.7)
    
    ax.get_lines()[1].set_color('red')
    ax.get_lines()[1].set_linewidth(2)
    ax.get_lines()[1].set_linestyle('--')
    
    # Labels
    ax.set_xlabel('Theoretical Quantiles (Normal Distribution)', 
                  fontsize=12, fontweight='bold')
    ax.set_ylabel('Sample Quantiles (Actual Data)', 
                  fontsize=12, fontweight='bold')
    ax.set_title(f'Q-Q Plot - Uji Normalitas{title_suffix}\n' + 
                 f'N={len(consumption)}',
                 fontsize=14, fontweight='bold', pad=20)
    
    # Add grid
    ax.grid(True, alpha=0.3, linestyle='--')
    
    # Add interpretation text
    r_squared = np.corrcoef(
        stats.norm.ppf((np.arange(len(consumption)) + 0.5) / len(consumption)),
        np.sort(consumption)
    )[0, 1]**2
    
    interpretation = "Data mendekati garis merah â†’ Distribusi NORMAL" if r_squared > 0.95 else \
                     "Data menyimpang dari garis merah â†’ Distribusi TIDAK NORMAL"
    
    ax.text(0.05, 0.95, f'RÂ² = {r_squared:.4f}\n{interpretation}',
            transform=ax.transAxes, fontsize=11,
            verticalalignment='top',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
    
    plt.tight_layout()
    plt.show()
    
    print(f"\nðŸ“Š Q-Q Plot Created")
    print(f"   RÂ² = {r_squared:.4f}")
    print(f"   Interpretation: {interpretation}")

# Plot Q-Q
print("="*80)
print("Q-Q PLOT - DATA SETELAH INTERPOLASI")
print("="*80)
plot_qq(daily_baru_interpolated, " (After Interpolation)")

## 3. BOX PLOT - Deteksi Outlier
Visualisasi distribusi dengan deteksi outlier menggunakan metode IQR

In [None]:
# BOX PLOT WITH OUTLIER DETECTION

def plot_boxplot(daily_df, title_suffix=""):
    """
    Box plot dengan anotasi statistik lengkap
    """
    if len(daily_df) == 0:
        print("Tidak ada data untuk divisualisasikan")
        return
    
    consumption = daily_df['avg_consumption_kmL'].values
    
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # Box plot
    bp = ax.boxplot(consumption, vert=True, patch_artist=True,
                    widths=0.5,
                    boxprops=dict(facecolor='lightblue', edgecolor='darkblue', linewidth=2),
                    medianprops=dict(color='red', linewidth=3),
                    whiskerprops=dict(color='darkblue', linewidth=1.5),
                    capprops=dict(color='darkblue', linewidth=1.5),
                    flierprops=dict(marker='o', markerfacecolor='red', markersize=10, 
                                   markeredgecolor='darkred', alpha=0.7))
    
    # Calculate statistics
    mean_val = np.mean(consumption)
    median_val = np.median(consumption)
    q1 = np.percentile(consumption, 25)
    q3 = np.percentile(consumption, 75)
    iqr = q3 - q1
    lower_fence = q1 - 1.5 * iqr
    upper_fence = q3 + 1.5 * iqr
    
    # Add mean marker
    ax.plot(1, mean_val, marker='D', markersize=12, color='green', 
            markeredgecolor='darkgreen', markeredgewidth=2, 
            label=f'Mean: {mean_val:.2f}', zorder=3)
    
    # Add horizontal lines for statistics
    ax.axhline(y=mean_val, color='green', linestyle='--', alpha=0.5, linewidth=1)
    ax.axhline(y=median_val, color='red', linestyle='--', alpha=0.5, linewidth=1)
    
    # Annotations
    y_offset = (consumption.max() - consumption.min()) * 0.02
    
    annotations = [
        (q3, f'Q3: {q3:.2f}', 'left'),
        (median_val, f'Median: {median_val:.2f}', 'right'),
        (q1, f'Q1: {q1:.2f}', 'left'),
        (upper_fence, f'Upper Fence: {upper_fence:.2f}', 'left'),
        (lower_fence, f'Lower Fence: {lower_fence:.2f}', 'left'),
    ]
    
    for y_pos, text, align in annotations:
        x_pos = 1.15 if align == 'left' else 0.85
        ax.text(x_pos, y_pos, text, fontsize=10, fontweight='bold',
                verticalalignment='center', horizontalalignment=align)
    
    # Count outliers
    outliers = consumption[(consumption < lower_fence) | (consumption > upper_fence)]
    n_outliers = len(outliers)
    
    # Title and labels
    ax.set_ylabel('Konsumsi BBM (km/L)', fontsize=12, fontweight='bold')
    ax.set_title(f'Box Plot - Distribusi Konsumsi BBM{title_suffix}\n' +
                 f'N={len(consumption)}, IQR={iqr:.2f}, Outliers={n_outliers}',
                 fontsize=14, fontweight='bold', pad=20)
    
    ax.set_xticks([1])
    ax.set_xticklabels(['Konsumsi BBM Harian'])
    ax.grid(True, axis='y', alpha=0.3)
    ax.legend(loc='upper left', fontsize=10)
    
    # Add statistics box
    stats_text = f'N: {len(consumption)}\n' \
                 f'Mean: {mean_val:.2f}\n' \
                 f'Median: {median_val:.2f}\n' \
                 f'IQR: {iqr:.2f}\n' \
                 f'Outliers: {n_outliers}'
    
    ax.text(0.02, 0.98, stats_text, transform=ax.transAxes,
            fontsize=11, verticalalignment='top',
            bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.8))
    
    plt.tight_layout()
    plt.show()
    
    print(f"\nðŸ“Š Box Plot Created")
    print(f"   Outliers detected: {n_outliers}")
    if n_outliers > 0:
        print(f"   Outlier values: {outliers}")

# Plot Box Plot
print("="*80)
print("BOX PLOT - DATA SETELAH INTERPOLASI")
print("="*80)
plot_boxplot(daily_baru_interpolated, " (After Interpolation)")

## 4. TIME SERIES PLOT - Tren Konsumsi Harian
Visualisasi konsumsi BBM dari waktu ke waktu

In [None]:
# TIME SERIES PLOT - DAILY CONSUMPTION TREND

def plot_time_series(daily_df, title_suffix=""):
    """
    Plot time series konsumsi harian dengan trend lines
    """
    if len(daily_df) == 0:
        print("Tidak ada data untuk divisualisasikan")
        return
    
    # Prepare data
    df = daily_df.copy()
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date')
    
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
    
    # ===== PLOT 1: Konsumsi BBM =====
    ax1.plot(df['date'], df['avg_consumption_kmL'], 
             marker='o', markersize=10, linewidth=2, 
             color='blue', markerfacecolor='lightblue', 
             markeredgecolor='darkblue', markeredgewidth=2,
             label='Konsumsi Harian')
    
    # Add mean and median lines
    mean_val = df['avg_consumption_kmL'].mean()
    median_val = df['avg_consumption_kmL'].median()
    
    ax1.axhline(y=mean_val, color='red', linestyle='--', linewidth=2, 
                alpha=0.7, label=f'Mean: {mean_val:.2f} km/L')
    ax1.axhline(y=median_val, color='green', linestyle='--', linewidth=2, 
                alpha=0.7, label=f'Median: {median_val:.2f} km/L')
    
    # Annotate each point
    for idx, row in df.iterrows():
        ax1.annotate(f'{row["avg_consumption_kmL"]:.2f}', 
                    (row['date'], row['avg_consumption_kmL']),
                    textcoords="offset points", xytext=(0,10), 
                    ha='center', fontsize=9, fontweight='bold')
    
    ax1.set_xlabel('Tanggal', fontsize=12, fontweight='bold')
    ax1.set_ylabel('Konsumsi BBM (km/L)', fontsize=12, fontweight='bold')
    ax1.set_title(f'Tren Konsumsi BBM Harian{title_suffix}', 
                  fontsize=14, fontweight='bold', pad=15)
    ax1.legend(loc='best', fontsize=10)
    ax1.grid(True, alpha=0.3)
    
    # Format x-axis
    import matplotlib.dates as mdates
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45, ha='right')
    
    # ===== PLOT 2: Jarak dan BBM =====
    ax2_twin = ax2.twinx()
    
    # Bar plot untuk jarak
    bars1 = ax2.bar(df['date'], df['distance_km'], 
                    alpha=0.6, color='skyblue', 
                    edgecolor='darkblue', linewidth=1.5,
                    label='Jarak (km)', width=0.3)
    
    # Line plot untuk BBM
    line1 = ax2_twin.plot(df['date'], df['fuel_used_L'], 
                          marker='s', markersize=8, linewidth=2, 
                          color='orange', markerfacecolor='yellow',
                          markeredgecolor='darkorange', markeredgewidth=2,
                          label='BBM Terpakai (L)')
    
    ax2.set_xlabel('Tanggal', fontsize=12, fontweight='bold')
    ax2.set_ylabel('Jarak (km)', fontsize=12, fontweight='bold', color='blue')
    ax2_twin.set_ylabel('BBM Terpakai (L)', fontsize=12, fontweight='bold', color='orange')
    
    ax2.tick_params(axis='y', labelcolor='blue')
    ax2_twin.tick_params(axis='y', labelcolor='orange')
    
    ax2.set_title('Jarak Tempuh vs BBM Terpakai per Hari', 
                  fontsize=14, fontweight='bold', pad=15)
    
    # Combine legends
    lines1, labels1 = ax2.get_legend_handles_labels()
    lines2, labels2 = ax2_twin.get_legend_handles_labels()
    ax2.legend(lines1 + lines2, labels1 + labels2, loc='upper left', fontsize=10)
    
    ax2.grid(True, alpha=0.3, axis='y')
    ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45, ha='right')
    
    plt.tight_layout()
    plt.show()
    
    print(f"\nðŸ“Š Time Series Plot Created")
    print(f"   Period: {df['date'].min()} to {df['date'].max()}")

# Plot Time Series
print("="*80)
print("TIME SERIES - DATA SETELAH INTERPOLASI")
print("="*80)
plot_time_series(daily_baru_interpolated, " (After Interpolation)")

## 5. NORMALITY TEST RESULTS - Tabel Ringkasan
Tabel hasil uji normalitas dengan color coding

In [None]:
# NORMALITY TEST RESULTS SUMMARY TABLE

def plot_normality_results_table(normality_results):
    """
    Visualisasi tabel hasil uji normalitas
    """
    if normality_results is None:
        print("Tidak ada hasil uji normalitas")
        return
    
    fig, ax = plt.subplots(figsize=(14, 6))
    ax.axis('tight')
    ax.axis('off')
    
    # Prepare data
    alpha = 0.05
    
    table_data = [
        ['Test', 'Statistic', 'p-value', 'Critical Value', 'Result', 'Conclusion'],
        [
            'Shapiro-Wilk',
            f"{normality_results['shapiro_stat']:.6f}",
            f"{normality_results['shapiro_pvalue']:.6f}",
            f"Î± = {alpha}",
            'âœ“ PASS' if normality_results['shapiro_pvalue'] > alpha else 'âœ— FAIL',
            'Normal' if normality_results['shapiro_pvalue'] > alpha else 'Not Normal'
        ],
        [
            'Kolmogorov-Smirnov',
            f"{normality_results['ks_stat']:.6f}",
            f"{normality_results['ks_pvalue']:.6f}",
            f"Î± = {alpha}",
            'âœ“ PASS' if normality_results['ks_pvalue'] > alpha else 'âœ— FAIL',
            'Normal' if normality_results['ks_pvalue'] > alpha else 'Not Normal'
        ],
        [
            'Anderson-Darling',
            f"{normality_results['anderson_stat']:.6f}",
            'N/A',
            f"{normality_results['anderson_critical'][2]:.3f} (5%)",
            'âœ“ PASS' if normality_results['anderson_normal'] else 'âœ— FAIL',
            'Normal' if normality_results['anderson_normal'] else 'Not Normal'
        ]
    ]
    
    # Create table
    table = ax.table(cellText=table_data, loc='center', cellLoc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(11)
    table.scale(1, 2.5)
    
    # Style header
    for i in range(6):
        cell = table[(0, i)]
        cell.set_facecolor('#4472C4')
        cell.set_text_props(weight='bold', color='white')
    
    # Style data rows with color coding
    for row in range(1, 4):
        # Get result
        result_cell = table[(row, 4)]
        result_text = table_data[row][4]
        
        # Color code based on result
        if 'âœ“ PASS' in result_text:
            color = '#C6EFCE'  # Light green
            for col in range(6):
                table[(row, col)].set_facecolor(color)
        else:
            color = '#FFC7CE'  # Light red
            for col in range(6):
                table[(row, col)].set_facecolor(color)
        
        # Bold result and conclusion
        table[(row, 4)].set_text_props(weight='bold')
        table[(row, 5)].set_text_props(weight='bold')
    
    # Add title
    plt.title('Hasil Uji Normalitas - Data Konsumsi BBM Harian\n' +
              f'Final Conclusion: {normality_results["conclusion"]}',
              fontsize=16, fontweight='bold', pad=20)
    
    # Add interpretation box
    normal_count = sum([
        normality_results.get('shapiro_normal', False),
        normality_results.get('ks_normal', False),
        normality_results.get('anderson_normal', False)
    ])
    
    interpretation = f"Tests Passed: {normal_count}/3\n\n"
    
    if normal_count >= 2:
        interpretation += "âœ“ Data berdistribusi NORMAL\n"
        interpretation += "âœ“ Dapat menggunakan uji parametrik (t-test)\n"
        interpretation += "âœ“ Asumsi normalitas terpenuhi"
    else:
        interpretation += "âœ— Data TIDAK berdistribusi normal\n"
        interpretation += "â†’ Gunakan uji non-parametrik (Mann-Whitney U)\n"
        interpretation += "â†’ Asumsi normalitas tidak terpenuhi"
    
    ax.text(0.5, -0.15, interpretation, transform=ax.transAxes,
            fontsize=12, verticalalignment='top', horizontalalignment='center',
            bbox=dict(boxstyle='round', facecolor='lightyellow', 
                     edgecolor='orange', linewidth=2, alpha=0.8))
    
    plt.tight_layout()
    plt.show()
    
    print(f"\nðŸ“Š Normality Test Results Table Created")
    print(f"   Conclusion: {normality_results['conclusion']}")

# Plot Normality Results
print("="*80)
print("NORMALITY TEST RESULTS - DATA SETELAH INTERPOLASI")
print("="*80)
plot_normality_results_table(normality_baru_interpolated)

## 6. STATISTICAL SUMMARY - Panel Visualisasi
Ringkasan statistik deskriptif dalam bentuk visual

In [None]:
# STATISTICAL SUMMARY VISUALIZATION PANEL

def plot_statistical_summary(daily_df, stats_dict):
    """
    Panel visualisasi statistik deskriptif
    """
    if len(daily_df) == 0 or stats_dict is None:
        print("Tidak ada data untuk divisualisasikan")
        return
    
    fig = plt.figure(figsize=(16, 10))
    gs = GridSpec(3, 3, figure=fig, hspace=0.3, wspace=0.3)
    
    consumption = daily_df['avg_consumption_kmL'].values
    
    # ===== 1. CENTRAL TENDENCY =====
    ax1 = fig.add_subplot(gs[0, 0])
    measures = ['Mean', 'Median']
    values = [stats_dict['Mean'], stats_dict['Median']]
    colors = ['#FF6B6B', '#4ECDC4']
    
    bars = ax1.barh(measures, values, color=colors, edgecolor='black', linewidth=2)
    ax1.set_xlabel('Konsumsi (km/L)', fontweight='bold')
    ax1.set_title('Central Tendency', fontweight='bold', fontsize=12)
    
    for i, (bar, val) in enumerate(zip(bars, values)):
        ax1.text(val, i, f'  {val:.2f}', va='center', fontweight='bold')
    
    ax1.grid(axis='x', alpha=0.3)
    
    # ===== 2. DISPERSION =====
    ax2 = fig.add_subplot(gs[0, 1])
    measures = ['Std Dev', 'Variance', 'Range', 'IQR']
    values = [stats_dict['Std_Dev'], stats_dict['Variance'], 
              stats_dict['Range'], stats_dict['IQR']]
    colors = ['#95E1D3', '#F38181', '#AA96DA', '#FCBAD3']
    
    bars = ax2.barh(measures, values, color=colors, edgecolor='black', linewidth=2)
    ax2.set_xlabel('Value', fontweight='bold')
    ax2.set_title('Dispersion Measures', fontweight='bold', fontsize=12)
    
    for i, (bar, val) in enumerate(zip(bars, values)):
        ax2.text(val, i, f'  {val:.2f}', va='center', fontweight='bold', fontsize=9)
    
    ax2.grid(axis='x', alpha=0.3)
    
    # ===== 3. RANGE =====
    ax3 = fig.add_subplot(gs[0, 2])
    measures = ['Min', 'Q1', 'Median', 'Q3', 'Max']
    values = [stats_dict['Min'], stats_dict['Q1'], stats_dict['Median'],
              stats_dict['Q3'], stats_dict['Max']]
    colors = ['#FFB6B9', '#FEC8D8', '#FFDBC5', '#FEE5AD', '#C1F7DC']
    
    bars = ax3.bar(measures, values, color=colors, edgecolor='black', linewidth=2)
    ax3.set_ylabel('Konsumsi (km/L)', fontweight='bold')
    ax3.set_title('Five-Number Summary', fontweight='bold', fontsize=12)
    
    for bar, val in zip(bars, values):
        height = bar.get_height()
        ax3.text(bar.get_x() + bar.get_width()/2., height,
                f'{val:.2f}', ha='center', va='bottom', fontweight='bold')
    
    ax3.grid(axis='y', alpha=0.3)
    
    # ===== 4. SHAPE INDICATORS =====
    ax4 = fig.add_subplot(gs[1, 0])
    
    skew = stats_dict['Skewness']
    kurt = stats_dict['Kurtosis']
    
    ax4.barh(['Skewness', 'Kurtosis'], [skew, kurt], 
            color=['#FFD93D', '#6BCB77'], edgecolor='black', linewidth=2)
    
    ax4.axvline(x=0, color='black', linestyle='-', linewidth=1)
    ax4.set_xlabel('Value', fontweight='bold')
    ax4.set_title('Distribution Shape', fontweight='bold', fontsize=12)
    ax4.grid(axis='x', alpha=0.3)
    
    # Add interpretations
    skew_text = 'Symmetric' if abs(skew) < 0.5 else \
                ('Right-skewed' if skew > 0 else 'Left-skewed')
    kurt_text = 'Normal' if abs(kurt) < 0.5 else \
                ('Heavy-tailed' if kurt > 0 else 'Light-tailed')
    
    ax4.text(skew, 0, f' {skew:.3f}\n {skew_text}', va='center', fontsize=9)
    ax4.text(kurt, 1, f' {kurt:.3f}\n {kurt_text}', va='center', fontsize=9)
    
    # ===== 5. HISTOGRAM =====
    ax5 = fig.add_subplot(gs[1, 1:])
    
    n, bins, patches = ax5.hist(consumption, bins='auto', density=False,
                                 alpha=0.7, color='skyblue', edgecolor='black')
    
    ax5.axvline(stats_dict['Mean'], color='red', linestyle='--', 
               linewidth=2, label=f"Mean: {stats_dict['Mean']:.2f}")
    ax5.axvline(stats_dict['Median'], color='green', linestyle='--', 
               linewidth=2, label=f"Median: {stats_dict['Median']:.2f}")
    
    ax5.set_xlabel('Konsumsi BBM (km/L)', fontweight='bold')
    ax5.set_ylabel('Frequency', fontweight='bold')
    ax5.set_title('Distribution Histogram', fontweight='bold', fontsize=12)
    ax5.legend()
    ax5.grid(axis='y', alpha=0.3)
    
    # ===== 6. STATISTICS TABLE =====
    ax6 = fig.add_subplot(gs[2, :])
    ax6.axis('tight')
    ax6.axis('off')
    
    table_data = [
        ['Statistic', 'Value', 'Statistic', 'Value'],
        ['N', f"{stats_dict['N']}", 'Range', f"{stats_dict['Range']:.4f}"],
        ['Mean', f"{stats_dict['Mean']:.4f}", 'Q1', f"{stats_dict['Q1']:.4f}"],
        ['Median', f"{stats_dict['Median']:.4f}", 'Q3', f"{stats_dict['Q3']:.4f}"],
        ['Std Dev', f"{stats_dict['Std_Dev']:.4f}", 'IQR', f"{stats_dict['IQR']:.4f}"],
        ['Variance', f"{stats_dict['Variance']:.4f}", 'Skewness', f"{stats_dict['Skewness']:.4f}"],
        ['Min', f"{stats_dict['Min']:.4f}", 'Kurtosis', f"{stats_dict['Kurtosis']:.4f}"],
        ['Max', f"{stats_dict['Max']:.4f}", '', ''],
    ]
    
    table = ax6.table(cellText=table_data, loc='center', cellLoc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1, 2)
    
    # Style header
    for i in range(4):
        table[(0, i)].set_facecolor('#4472C4')
        table[(0, i)].set_text_props(weight='bold', color='white')
    
    # Style alternating rows
    for row in range(1, len(table_data)):
        color = '#E7E6E6' if row % 2 == 0 else 'white'
        for col in range(4):
            table[(row, col)].set_facecolor(color)
    
    # Main title
    fig.suptitle('Statistical Summary - Konsumsi BBM Harian (After Interpolation)',
                 fontsize=16, fontweight='bold', y=0.98)
    
    plt.tight_layout()
    plt.show()
    
    print(f"\nðŸ“Š Statistical Summary Panel Created")

# Plot Statistical Summary
print("="*80)
print("STATISTICAL SUMMARY - DATA SETELAH INTERPOLASI")
print("="*80)

# Recalculate stats if needed
if 'stats_baru' not in locals() or stats_baru is None:
    stats_baru = descriptive_statistics(daily_baru_interpolated)

plot_statistical_summary(daily_baru_interpolated, stats_baru)

## 7. COMPARISON - Before vs After Interpolation
Perbandingan distribusi sebelum dan sesudah interpolasi fuel sensor

In [None]:
# COMPARISON PLOT - BEFORE vs AFTER INTERPOLATION

def plot_before_after_comparison(daily_before, daily_after):
    """
    Perbandingan side-by-side sebelum dan sesudah interpolasi
    """
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    
    # Check if we have data
    has_before = len(daily_before) > 0
    has_after = len(daily_after) > 0
    
    # ===== ROW 1: HISTOGRAMS =====
    
    # BEFORE
    ax = axes[0, 0]
    if has_before:
        consumption_before = daily_before['avg_consumption_kmL'].values
        ax.hist(consumption_before, bins='auto', alpha=0.7, 
               color='lightcoral', edgecolor='darkred', label='Before')
        mean_before = np.mean(consumption_before)
        ax.axvline(mean_before, color='red', linestyle='--', linewidth=2)
        ax.set_title(f'BEFORE Interpolation\nN={len(consumption_before)}, Mean={mean_before:.2f}',
                    fontweight='bold')
    else:
        ax.text(0.5, 0.5, 'No Data', ha='center', va='center', 
               fontsize=14, transform=ax.transAxes)
        ax.set_title('BEFORE Interpolation\n(No Valid Data)', fontweight='bold')
    
    ax.set_xlabel('Konsumsi (km/L)', fontweight='bold')
    ax.set_ylabel('Frequency', fontweight='bold')
    ax.grid(alpha=0.3)
    
    # AFTER
    ax = axes[0, 1]
    if has_after:
        consumption_after = daily_after['avg_consumption_kmL'].values
        ax.hist(consumption_after, bins='auto', alpha=0.7, 
               color='lightgreen', edgecolor='darkgreen', label='After')
        mean_after = np.mean(consumption_after)
        ax.axvline(mean_after, color='green', linestyle='--', linewidth=2)
        ax.set_title(f'AFTER Interpolation\nN={len(consumption_after)}, Mean={mean_after:.2f}',
                    fontweight='bold')
    else:
        ax.text(0.5, 0.5, 'No Data', ha='center', va='center', 
               fontsize=14, transform=ax.transAxes)
        ax.set_title('AFTER Interpolation\n(No Valid Data)', fontweight='bold')
    
    ax.set_xlabel('Konsumsi (km/L)', fontweight='bold')
    ax.set_ylabel('Frequency', fontweight='bold')
    ax.grid(alpha=0.3)
    
    # COMPARISON
    ax = axes[0, 2]
    if has_before and has_after:
        ax.hist(consumption_before, bins='auto', alpha=0.5, 
               color='red', label=f'Before (N={len(consumption_before)})', edgecolor='darkred')
        ax.hist(consumption_after, bins='auto', alpha=0.5, 
               color='green', label=f'After (N={len(consumption_after)})', edgecolor='darkgreen')
        ax.axvline(mean_before, color='red', linestyle='--', linewidth=2, alpha=0.7)
        ax.axvline(mean_after, color='green', linestyle='--', linewidth=2, alpha=0.7)
        ax.set_title('OVERLAY Comparison', fontweight='bold')
        ax.legend(loc='best')
    elif has_after:
        ax.hist(consumption_after, bins='auto', alpha=0.7, 
               color='green', label='After', edgecolor='darkgreen')
        ax.set_title('Only AFTER data available', fontweight='bold')
    else:
        ax.text(0.5, 0.5, 'Insufficient Data', ha='center', va='center', 
               fontsize=14, transform=ax.transAxes)
        ax.set_title('Comparison Not Available', fontweight='bold')
    
    ax.set_xlabel('Konsumsi (km/L)', fontweight='bold')
    ax.set_ylabel('Frequency', fontweight='bold')
    ax.grid(alpha=0.3)
    
    # ===== ROW 2: BOX PLOTS =====
    
    # BEFORE
    ax = axes[1, 0]
    if has_before:
        bp = ax.boxplot(consumption_before, vert=True, patch_artist=True,
                       boxprops=dict(facecolor='lightcoral'),
                       medianprops=dict(color='darkred', linewidth=2))
        ax.set_ylabel('Konsumsi (km/L)', fontweight='bold')
        ax.set_title('Box Plot - BEFORE', fontweight='bold')
        ax.set_xticklabels(['Before'])
    else:
        ax.text(0.5, 0.5, 'No Data', ha='center', va='center', 
               fontsize=14, transform=ax.transAxes)
        ax.set_title('Box Plot - BEFORE\n(No Data)', fontweight='bold')
    ax.grid(alpha=0.3)
    
    # AFTER
    ax = axes[1, 1]
    if has_after:
        bp = ax.boxplot(consumption_after, vert=True, patch_artist=True,
                       boxprops=dict(facecolor='lightgreen'),
                       medianprops=dict(color='darkgreen', linewidth=2))
        ax.set_ylabel('Konsumsi (km/L)', fontweight='bold')
        ax.set_title('Box Plot - AFTER', fontweight='bold')
        ax.set_xticklabels(['After'])
    else:
        ax.text(0.5, 0.5, 'No Data', ha='center', va='center', 
               fontsize=14, transform=ax.transAxes)
        ax.set_title('Box Plot - AFTER\n(No Data)', fontweight='bold')
    ax.grid(alpha=0.3)
    
    # STATISTICS COMPARISON TABLE
    ax = axes[1, 2]
    ax.axis('tight')
    ax.axis('off')
    
    if has_before and has_after:
        table_data = [
            ['Metric', 'Before', 'After', 'Change'],
            ['N', f"{len(consumption_before)}", f"{len(consumption_after)}", 
             f"+{len(consumption_after)-len(consumption_before)}"],
            ['Mean', f"{np.mean(consumption_before):.2f}", 
             f"{np.mean(consumption_after):.2f}",
             f"{np.mean(consumption_after)-np.mean(consumption_before):+.2f}"],
            ['Median', f"{np.median(consumption_before):.2f}", 
             f"{np.median(consumption_after):.2f}",
             f"{np.median(consumption_after)-np.median(consumption_before):+.2f}"],
            ['Std Dev', f"{np.std(consumption_before, ddof=1):.2f}", 
             f"{np.std(consumption_after, ddof=1):.2f}",
             f"{np.std(consumption_after, ddof=1)-np.std(consumption_before, ddof=1):+.2f}"],
        ]
    elif has_after:
        table_data = [
            ['Metric', 'After'],
            ['N', f"{len(consumption_after)}"],
            ['Mean', f"{np.mean(consumption_after):.2f}"],
            ['Median', f"{np.median(consumption_after):.2f}"],
            ['Std Dev', f"{np.std(consumption_after, ddof=1):.2f}"],
        ]
    else:
        table_data = [['No Data Available']]
    
    table = ax.table(cellText=table_data, loc='center', cellLoc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1, 2.5)
    
    # Style header
    for i in range(len(table_data[0])):
        table[(0, i)].set_facecolor('#4472C4')
        table[(0, i)].set_text_props(weight='bold', color='white')
    
    ax.set_title('Statistics Comparison', fontweight='bold', pad=20)
    
    # Main title
    fig.suptitle('COMPARISON: Before vs After Fuel Sensor Interpolation',
                 fontsize=16, fontweight='bold', y=0.98)
    
    plt.tight_layout()
    plt.show()
    
    print(f"\nðŸ“Š Before/After Comparison Plot Created")
    if has_before and has_after:
        print(f"   Sample size increased: {len(consumption_before)} â†’ {len(consumption_after)} "
              f"(+{len(consumption_after)-len(consumption_before)} days)")

# Plot Comparison
print("="*80)
print("COMPARISON - BEFORE vs AFTER INTERPOLATION")
print("="*80)
plot_before_after_comparison(daily_baru, daily_baru_interpolated)

## 8. EXPORT RESULTS
Simpan hasil visualisasi dan data ke file

In [None]:
# EXPORT RESULTS TO FILES

import os
from datetime import datetime

# Create output directory
output_dir = r'c:\Users\Aspire 3\OneDrive\ë¬¸ì„œ\MAGANG 2025\MAGANG\BBM\output'
os.makedirs(output_dir, exist_ok=True)

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

print("="*80)
print("EXPORTING RESULTS")
print("="*80)

# 1. Export daily consumption data
if len(daily_baru_interpolated) > 0:
    output_file = os.path.join(output_dir, f'daily_consumption_{timestamp}.csv')
    daily_baru_interpolated.to_csv(output_file, index=False)
    print(f"\nâœ“ Daily consumption data exported to:")
    print(f"  {output_file}")

# 2. Export trip data
if len(trips_baru_interpolated) > 0:
    output_file = os.path.join(output_dir, f'trips_detail_{timestamp}.csv')
    trips_baru_interpolated.to_csv(output_file, index=False)
    print(f"\nâœ“ Trip details exported to:")
    print(f"  {output_file}")

# 3. Export statistical summary
if stats_baru is not None:
    output_file = os.path.join(output_dir, f'statistical_summary_{timestamp}.txt')
    with open(output_file, 'w') as f:
        f.write("="*80 + "\n")
        f.write("STATISTICAL SUMMARY - KONSUMSI BBM HARIAN\n")
        f.write("="*80 + "\n\n")
        
        for key, value in stats_baru.items():
            f.write(f"{key:20s}: {value}\n")
    
    print(f"\nâœ“ Statistical summary exported to:")
    print(f"  {output_file}")

# 4. Export normality test results
if normality_baru_interpolated is not None:
    output_file = os.path.join(output_dir, f'normality_test_{timestamp}.txt')
    with open(output_file, 'w') as f:
        f.write("="*80 + "\n")
        f.write("NORMALITY TEST RESULTS\n")
        f.write("="*80 + "\n\n")
        
        f.write("1. SHAPIRO-WILK TEST\n")
        f.write(f"   Statistic: {normality_baru_interpolated['shapiro_stat']:.6f}\n")
        f.write(f"   p-value: {normality_baru_interpolated['shapiro_pvalue']:.6f}\n")
        f.write(f"   Result: {'NORMAL' if normality_baru_interpolated['shapiro_normal'] else 'NOT NORMAL'}\n\n")
        
        f.write("2. KOLMOGOROV-SMIRNOV TEST\n")
        f.write(f"   Statistic: {normality_baru_interpolated['ks_stat']:.6f}\n")
        f.write(f"   p-value: {normality_baru_interpolated['ks_pvalue']:.6f}\n")
        f.write(f"   Result: {'NORMAL' if normality_baru_interpolated['ks_normal'] else 'NOT NORMAL'}\n\n")
        
        f.write("3. ANDERSON-DARLING TEST\n")
        f.write(f"   Statistic: {normality_baru_interpolated['anderson_stat']:.6f}\n")
        f.write(f"   Result: {'NORMAL' if normality_baru_interpolated['anderson_normal'] else 'NOT NORMAL'}\n\n")
        
        f.write(f"FINAL CONCLUSION: {normality_baru_interpolated['conclusion']}\n")
    
    print(f"\nâœ“ Normality test results exported to:")
    print(f"  {output_file}")

print("\n" + "="*80)
print("âœ“ ALL RESULTS EXPORTED SUCCESSFULLY")
print("="*80)

## SUMMARY - Kesimpulan Analisis

### Data Overview
- **Periode**: November 6-9, 2025
- **Jumlah hari valid**: Lihat output di atas
- **Metode**: Interpolasi linear untuk nilai fuel sensor yang hilang

### Hasil Uji Normalitas
Berdasarkan tiga uji normalitas (Shapiro-Wilk, Kolmogorov-Smirnov, Anderson-Darling):
- Lihat tabel hasil di atas untuk kesimpulan lengkap

### Visualisasi yang Dibuat
1. âœ“ Histogram dengan kurva normal overlay
2. âœ“ Q-Q Plot untuk assessment normalitas visual
3. âœ“ Box Plot untuk deteksi outlier
4. âœ“ Time Series Plot untuk tren konsumsi
5. âœ“ Tabel ringkasan uji normalitas
6. âœ“ Panel statistik deskriptif komprehensif
7. âœ“ Perbandingan before/after interpolasi

### File Output
Semua hasil telah disimpan di folder: `c:\Users\Aspire 3\OneDrive\ë¬¸ì„œ\MAGANG 2025\MAGANG\BBM\output`