Load Data

In [36]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set style untuk visualisasi
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

In [37]:
# ============================================================================
# CELL 2: LOAD DATA
# ============================================================================

# Load langsung karena cuma 1 sheet
df_lama = pd.read_excel('data lama 4 hari.xlsx')

print("=" * 60)
print(f"DATA LOADED: {len(df_lama)} rows")
print("=" * 60)
print("\nKolom yang tersedia:")
print(df_lama.columns.tolist())
print("\n5 baris pertama:")
print(df_lama.head())

DATA LOADED: 1505 rows

Kolom yang tersedia:
['NO', 'GPSTIME', 'RECEIVE TIME', 'ACC', 'SPEED', 'ODOMETER', 'ALTITUDE', 'HEADING', 'STATUS KENDARAAN', 'LOCATION', 'ADDRESS', 'KOTA', 'PROVINSI', 'LON', 'LAT', 'GPS SATELITE', 'GSM SIGNAL', 'MAIN POWER VOLTAGE', 'AIN 1', 'AIN 3', 'DIN 1', 'DIN 2', 'DIN 3', 'DIN 4', 'DOUT 1', 'DOUT 2', 'VALUE FUEL SENSOR', 'UNIT FUEL SENSOR', 'VOLTAGE FUEL SENSOR']

5 baris pertama:
     NO             GPSTIME            RECEIVE TIME  ACC  SPEED     ODOMETER  \
0  1163 2025-08-28 00:05:14 2025-08-28 00:05:50.461  OFF      0  3056.600098   
1  1164 2025-08-28 00:35:15 2025-08-28 00:35:53.006  OFF      0  3056.600098   
2  1165 2025-08-28 01:05:15 2025-08-28 01:05:52.277  OFF      0  3056.600098   
3  1166 2025-08-28 01:35:16 2025-08-28 01:35:54.312  OFF      0  3056.600098   
4  1167 2025-08-28 02:05:14 2025-08-28 02:05:56.541  OFF      0  3056.600098   

   ALTITUDE    HEADING        STATUS KENDARAAN  \
0         0  NE   (44)  Parking 1 d, 6 h, 16 m   
1   

In [38]:
# ============================================================================
# CELL 3: FUNGSI PARSING & LOADING DATA
# ============================================================================

def parse_datetime_safe(col):
    """
    Fungsi untuk parsing datetime dengan format DD/MM/YYYY HH:MM:SS
    """
    col = col.astype(str).str.strip()
    parsed = pd.to_datetime(col, format='%d/%m/%Y %H:%M:%S', errors='coerce', dayfirst=True)
    
    if parsed.isna().sum() > len(col) * 0.5:
        parsed = pd.to_datetime(col, errors='coerce', dayfirst=True, infer_datetime_format=True)
    
    return parsed


def process_data_lama(df):
    """
    Proses data LAMA
    
    Returns:
    - df: DataFrame lengkap
    - df_active: DataFrame ACC ON saja
    """
    print("="*60)
    print("PROCESSING DATA LAMA")
    print("="*60)
    
    df = df.copy()
    
    print(f"\n[1/5] Data loaded: {len(df)} rows")
    
    # Parse GPSTIME
    print("[2/5] Parsing GPSTIME...")
    df['GPSTIME'] = parse_datetime_safe(df['GPSTIME'])
    
    # Parse RECEIVE TIME jika ada
    if 'RECEIVE TIME' in df.columns:
        df['RECEIVE TIME'] = parse_datetime_safe(df['RECEIVE TIME'])
    
    # Konversi kolom numerik
    print("[3/5] Converting numeric columns...")
    df['VALUE FUEL SENSOR'] = pd.to_numeric(df['VALUE FUEL SENSOR'], errors='coerce')
    df['VOLTAGE FUEL SENSOR'] = pd.to_numeric(df['VOLTAGE FUEL SENSOR'], errors='coerce')
    df['ODOMETER'] = pd.to_numeric(df['ODOMETER'], errors='coerce')
    df['SPEED'] = pd.to_numeric(df['SPEED'], errors='coerce')
    
    # Hapus baris invalid
    print("[4/5] Removing invalid rows...")
    initial_count = len(df)
    df = df.dropna(subset=['GPSTIME'])
    removed = initial_count - len(df)
    if removed > 0:
        print(f"    Removed {removed} rows with invalid GPSTIME")
    
    # Sort berdasarkan waktu
    print("[5/5] Sorting and filtering...")
    df = df.sort_values('GPSTIME').reset_index(drop=True)
    
    # Filter ACC ON
    df_active = df[df['ACC'] == 'ON'].copy()
    
    # Ringkasan
    print("\n" + "-"*60)
    print(f"Total data: {len(df):,}")
    print(f"Data ACC ON: {len(df_active):,}")
    
    if len(df_active) > 0:
        print(f"Rentang waktu: {df_active['GPSTIME'].min()} s/d {df_active['GPSTIME'].max()}")
        months = df_active['GPSTIME'].dt.month.unique()
        days = sorted(df_active['GPSTIME'].dt.day.unique())
        print(f"Bulan: {months}")
        print(f"Hari: {days}")
    else:
        print("‚ö†Ô∏è  Tidak ada data dengan ACC ON!")
    
    print("="*60)
    
    return df, df_active


# PROCESS DATA LAMA
df_lama_full, df_lama_active = process_data_lama(df_lama)

PROCESSING DATA LAMA

[1/5] Data loaded: 1505 rows
[2/5] Parsing GPSTIME...
[3/5] Converting numeric columns...
[4/5] Removing invalid rows...
[5/5] Sorting and filtering...

------------------------------------------------------------
Total data: 1,505
Data ACC ON: 1,278
Rentang waktu: 2025-08-28 07:44:03 s/d 2025-08-31 14:10:37
Bulan: [8]
Hari: [np.int32(28), np.int32(29), np.int32(31)]


In [39]:
# ============================================================================
# CELL 4: FUNGSI INTERPOLASI FUEL SENSOR
# ============================================================================

def interpolate_fuel_sensor(df):
    """
    Interpolasi nilai fuel sensor yang hilang (NaN)
    menggunakan metode linear berdasarkan waktu
    """
    print("\n" + "="*80)
    print("INTERPOLASI FUEL SENSOR")
    print("="*80)
    
    df = df.copy()
    
    # Hitung NaN sebelum interpolasi
    nan_before = df['VALUE FUEL SENSOR'].isna().sum()
    
    # Interpolasi linear berdasarkan index waktu
    df = df.sort_values('GPSTIME').reset_index(drop=True)
    
    # Set GPSTIME sebagai index untuk interpolasi berbasis waktu
    df_temp = df.set_index('GPSTIME')
    
    # Interpolasi dengan metode time-weighted
    df_temp['VALUE FUEL SENSOR'] = df_temp['VALUE FUEL SENSOR'].interpolate(
        method='time', 
        limit_direction='both'
    )
    
    # Reset index
    df = df_temp.reset_index()
    
    # Hitung NaN setelah interpolasi
    nan_after = df['VALUE FUEL SENSOR'].isna().sum()
    
    print(f"\nNaN sebelum interpolasi : {nan_before}")
    print(f"NaN setelah interpolasi : {nan_after}")
    print(f"Data yang diisi         : {nan_before - nan_after}")
    
    # Jika masih ada NaN, isi dengan forward fill lalu backward fill
    if nan_after > 0:
        df['VALUE FUEL SENSOR'] = df['VALUE FUEL SENSOR'].fillna(method='ffill')
        df['VALUE FUEL SENSOR'] = df['VALUE FUEL SENSOR'].fillna(method='bfill')
        
        nan_final = df['VALUE FUEL SENSOR'].isna().sum()
        print(f"NaN setelah fill        : {nan_final}")
    
    print("="*80)
    
    return df


In [40]:
# ============================================================================
# CELL 5: FUNGSI PERHITUNGAN KONSUMSI BBM PER TRIP
# ============================================================================

def calculate_fuel_consumption(df):
    """
    Menghitung konsumsi BBM berdasarkan trip yang terdeteksi
    
    Logika deteksi trip baru:
    1. Gap waktu > 60 menit
    2. Odometer turun (reset)
    3. Fuel naik > 10 liter (indikasi refuel)
    
    Filter trip valid:
    - Jarak minimal 0.5 km
    - BBM terpakai minimal 0.2 liter
    - Konsumsi realistis: 1-20 km/L
    
    Parameters:
    - df: DataFrame dengan kolom GPSTIME, ODOMETER, VALUE FUEL SENSOR, SPEED
    
    Returns:
    - DataFrame berisi analisis per trip
    """
    print("\n" + "="*80)
    print("ANALISIS KONSUMSI BBM PER TRIP")
    print("="*80)
    
    df = df.copy()
    
    # Hitung perbedaan waktu antar record (dalam menit)
    df['time_diff'] = df['GPSTIME'].diff().dt.total_seconds() / 60
    
    # Hitung perbedaan odometer
    df['odo_diff'] = df['ODOMETER'].diff()
    
    # Hitung perbedaan fuel sensor
    df['fuel_diff'] = df['VALUE FUEL SENSOR'].diff()
    
    # Identifikasi trip baru
    df['new_trip'] = ((df['time_diff'] > 60) | 
                      (df['odo_diff'] < 0) | 
                      (df['fuel_diff'] > 10))
    
    df['trip_id'] = df['new_trip'].cumsum()
    
    # Analisis setiap trip
    trip_list = []
    
    for trip_id, trip_data in df.groupby('trip_id'):
        if len(trip_data) < 2:
            continue
        
        start_odo = trip_data['ODOMETER'].iloc[0]
        end_odo = trip_data['ODOMETER'].iloc[-1]
        distance = end_odo - start_odo
        
        start_fuel = trip_data['VALUE FUEL SENSOR'].iloc[0]
        end_fuel = trip_data['VALUE FUEL SENSOR'].iloc[-1]
        fuel_used = start_fuel - end_fuel
        
        # Filter trip valid
        if distance > 0.5 and fuel_used > 0.2:
            consumption = distance / fuel_used
            
            if 1 <= consumption <= 20:
                duration_hours = (trip_data['GPSTIME'].iloc[-1] - 
                                trip_data['GPSTIME'].iloc[0]).total_seconds() / 3600
                
                trip_list.append({
                    'trip_id': trip_id,
                    'start_time': trip_data['GPSTIME'].iloc[0],
                    'end_time': trip_data['GPSTIME'].iloc[-1],
                    'duration_hours': duration_hours,
                    'distance_km': distance,
                    'fuel_used_L': fuel_used,
                    'consumption_kmL': consumption,
                    'avg_speed': trip_data['SPEED'].mean(),
                    'max_speed': trip_data['SPEED'].max(),
                    'data_points': len(trip_data)
                })
    
    result = pd.DataFrame(trip_list)
    
    if len(result) > 0:
        print(f"\nTotal trip terdeteksi: {len(result)}")
        print(f"Rata-rata konsumsi: {result['consumption_kmL'].mean():.2f} km/L")
        print(f"Std deviasi konsumsi: {result['consumption_kmL'].std():.2f} km/L")
        print(f"Min konsumsi: {result['consumption_kmL'].min():.2f} km/L")
        print(f"Max konsumsi: {result['consumption_kmL'].max():.2f} km/L")
    else:
        print("\n‚ö†Ô∏è  Tidak ada trip valid terdeteksi")
    
    print("="*80)
    
    return result

In [41]:
# ============================================================================
# CELL 6: FUNGSI AGREGASI HARIAN
# ============================================================================

def aggregate_daily(trip_df):
    """
    Agregasi data trip menjadi rata-rata harian
    """
    print("\n" + "="*80)
    print("AGREGASI KONSUMSI PER HARI")
    print("="*80)
    
    if len(trip_df) == 0:
        print("\n‚ö†Ô∏è  Tidak ada data trip untuk diagregasi")
        return pd.DataFrame()
    
    trip_df['date'] = pd.to_datetime(trip_df['start_time']).dt.date
    
    daily_agg = trip_df.groupby('date').agg({
        'distance_km': 'sum',
        'fuel_used_L': 'sum',
        'avg_speed': 'mean',
        'duration_hours': 'sum',
        'trip_id': 'count'
    }).reset_index()
    
    daily_agg = daily_agg.rename(columns={'trip_id': 'num_trips'})
    daily_agg['avg_consumption_kmL'] = daily_agg['distance_km'] / daily_agg['fuel_used_L']
    
    daily_agg = daily_agg[
        (daily_agg['num_trips'] >= 1) & 
        (daily_agg['avg_consumption_kmL'] >= 1) &
        (daily_agg['avg_consumption_kmL'] <= 20)
    ]
    
    print(f"\nTotal hari: {len(daily_agg)}")
    if len(daily_agg) > 0:
        print(f"Rata-rata konsumsi harian: {daily_agg['avg_consumption_kmL'].mean():.2f} km/L")
        print(f"Std deviasi: {daily_agg['avg_consumption_kmL'].std():.2f} km/L")
        
        print("\nDetail per hari:")
        for idx, row in daily_agg.iterrows():
            print(f"  {row['date']}: {row['avg_consumption_kmL']:.2f} km/L "
                  f"({row['num_trips']} trips, {row['distance_km']:.1f} km, {row['fuel_used_L']:.1f} L)")
    
    print("="*80)
    
    return daily_agg

In [42]:
# ============================================================================
# CELL 7: FUNGSI STATISTIK DESKRIPTIF
# ============================================================================

def descriptive_statistics(daily_df):
    """
    Menghitung statistik deskriptif untuk konsumsi BBM harian
    """
    print("\n" + "="*80)
    print("STATISTIK DESKRIPTIF - KONSUMSI BBM HARIAN")
    print("="*80)
    
    if len(daily_df) == 0:
        print("\n‚ö†Ô∏è  Tidak ada data untuk analisis statistik")
        return None
    
    consumption = daily_df['avg_consumption_kmL'].values
    
    stats_dict = {
        'N': len(consumption),
        'Mean': np.mean(consumption),
        'Median': np.median(consumption),
        'Std_Dev': np.std(consumption, ddof=1),
        'Variance': np.var(consumption, ddof=1),
        'Min': np.min(consumption),
        'Max': np.max(consumption),
        'Range': np.max(consumption) - np.min(consumption),
        'Q1': np.percentile(consumption, 25),
        'Q3': np.percentile(consumption, 75),
        'IQR': np.percentile(consumption, 75) - np.percentile(consumption, 25),
        'Skewness': stats.skew(consumption),
        'Kurtosis': stats.kurtosis(consumption)
    }
    
    print(f"\nJumlah Sampel (N)         : {stats_dict['N']}")
    print(f"Mean                      : {stats_dict['Mean']:.4f} km/L")
    print(f"Median                    : {stats_dict['Median']:.4f} km/L")
    print(f"Standard Deviation        : {stats_dict['Std_Dev']:.4f} km/L")
    print(f"Variance                  : {stats_dict['Variance']:.4f}")
    print(f"Min                       : {stats_dict['Min']:.4f} km/L")
    print(f"Max                       : {stats_dict['Max']:.4f} km/L")
    print(f"Range                     : {stats_dict['Range']:.4f} km/L")
    print(f"Q1 (Percentile 25)        : {stats_dict['Q1']:.4f} km/L")
    print(f"Q3 (Percentile 75)        : {stats_dict['Q3']:.4f} km/L")
    print(f"IQR (Interquartile Range) : {stats_dict['IQR']:.4f} km/L")
    print(f"Skewness                  : {stats_dict['Skewness']:.4f}")
    print(f"Kurtosis                  : {stats_dict['Kurtosis']:.4f}")
    
    print("\nInterpretasi:")
    
    if abs(stats_dict['Skewness']) < 0.5:
        print("  - Skewness: Distribusi SIMETRIS")
    elif stats_dict['Skewness'] > 0:
        print("  - Skewness: Distribusi MIRING KANAN (positive skew)")
    else:
        print("  - Skewness: Distribusi MIRING KIRI (negative skew)")
    
    if abs(stats_dict['Kurtosis']) < 0.5:
        print("  - Kurtosis: Distribusi NORMAL (mesokurtic)")
    elif stats_dict['Kurtosis'] > 0:
        print("  - Kurtosis: Distribusi LANCIP (leptokurtic)")
    else:
        print("  - Kurtosis: Distribusi DATAR (platykurtic)")
    
    print("="*80)
    
    return stats_dict

In [43]:
# ============================================================================
# CELL 8: FUNGSI UJI NORMALITAS
# ============================================================================

def test_normality(daily_df):
    """
    Uji normalitas distribusi menggunakan:
    1. Shapiro-Wilk Test
    2. Kolmogorov-Smirnov Test
    3. Anderson-Darling Test
    """
    print("\n" + "="*80)
    print("UJI NORMALITAS DISTRIBUSI")
    print("="*80)
    
    if len(daily_df) < 3:
        print("\n‚ö†Ô∏è  Sampel terlalu sedikit untuk uji normalitas (minimal 3)")
        return None
    
    consumption = daily_df['avg_consumption_kmL'].values
    results = {}
    
    # 1. SHAPIRO-WILK TEST
    print("\n1. SHAPIRO-WILK TEST")
    print("-" * 80)
    stat_shapiro, p_shapiro = stats.shapiro(consumption)
    results['shapiro_stat'] = stat_shapiro
    results['shapiro_pvalue'] = p_shapiro
    
    print(f"Statistic : {stat_shapiro:.6f}")
    print(f"p-value   : {p_shapiro:.6f}")
    
    if p_shapiro > 0.05:
        print("Keputusan : TERIMA H0 (data berdistribusi NORMAL)")
        results['shapiro_normal'] = True
    else:
        print("Keputusan : TOLAK H0 (data TIDAK berdistribusi normal)")
        results['shapiro_normal'] = False
    
    # 2. KOLMOGOROV-SMIRNOV TEST
    print("\n2. KOLMOGOROV-SMIRNOV TEST")
    print("-" * 80)
    
    mean = np.mean(consumption)
    std = np.std(consumption, ddof=1)
    
    stat_ks, p_ks = stats.kstest(consumption, 'norm', args=(mean, std))
    results['ks_stat'] = stat_ks
    results['ks_pvalue'] = p_ks
    
    print(f"Statistic : {stat_ks:.6f}")
    print(f"p-value   : {p_ks:.6f}")
    
    if p_ks > 0.05:
        print("Keputusan : TERIMA H0 (data berdistribusi NORMAL)")
        results['ks_normal'] = True
    else:
        print("Keputusan : TOLAK H0 (data TIDAK berdistribusi normal)")
        results['ks_normal'] = False
    
    # 3. ANDERSON-DARLING TEST
    print("\n3. ANDERSON-DARLING TEST")
    print("-" * 80)
    
    result_anderson = stats.anderson(consumption, dist='norm')
    results['anderson_stat'] = result_anderson.statistic
    results['anderson_critical'] = result_anderson.critical_values
    results['anderson_significance'] = result_anderson.significance_level
    
    print(f"Statistic : {result_anderson.statistic:.6f}")
    print(f"\nCritical Values:")
    
    anderson_normal = True
    for i, (cv, sl) in enumerate(zip(result_anderson.critical_values, 
                                     result_anderson.significance_level)):
        print(f"  {sl}% : {cv:.3f}", end="")
        if result_anderson.statistic < cv:
            print(" - NORMAL")
        else:
            print(" - TIDAK NORMAL")
            if sl == 5.0:
                anderson_normal = False
    
    results['anderson_normal'] = anderson_normal
    
    # KESIMPULAN
    print("\n" + "="*80)
    print("KESIMPULAN UJI NORMALITAS")
    print("="*80)
    
    normal_count = sum([
        results.get('shapiro_normal', False),
        results.get('ks_normal', False),
        results.get('anderson_normal', False)
    ])
    
    print(f"\nJumlah uji yang menyatakan NORMAL: {normal_count}/3")
    
    if normal_count >= 2:
        print("\n‚úÖ KESIMPULAN AKHIR: Data berdistribusi NORMAL")
        print("   Dapat menggunakan uji parametrik (t-test)")
        results['conclusion'] = 'NORMAL'
    else:
        print("\n‚ùå KESIMPULAN AKHIR: Data TIDAK berdistribusi normal")
        print("   Sebaiknya gunakan uji non-parametrik (Mann-Whitney U)")
        results['conclusion'] = 'NOT NORMAL'
    
    print("="*80)
    
    return results

In [44]:
# ============================================================================
# CELL 9: PIPELINE LENGKAP - INTERPOLASI ‚Üí TRIPS ‚Üí DAILY ‚Üí STATS
# ============================================================================

print("\n" + "="*80)
print("üöÄ PIPELINE ANALISIS BBM (DENGAN INTERPOLASI)")
print("="*80)

# STEP 1: INTERPOLASI FUEL SENSOR
print("\n[STEP 1/5] Interpolasi Fuel Sensor...")
df_lama_active_interpolated = interpolate_fuel_sensor(df_lama_active)

# STEP 2: HITUNG TRIPS
print("\n[STEP 2/5] Menghitung konsumsi per trip...")
trips_lama = calculate_fuel_consumption(df_lama_active_interpolated)

# STEP 3: AGREGASI HARIAN
print("\n[STEP 3/5] Agregasi konsumsi harian...")
daily_lama = aggregate_daily(trips_lama)

# STEP 4: STATISTIK DESKRIPTIF
print("\n[STEP 4/5] Menghitung statistik deskriptif...")
if len(daily_lama) > 0:
    stats_lama = descriptive_statistics(daily_lama)
else:
    print("‚ùå Tidak ada data untuk statistik")
    stats_lama = None

# STEP 5: UJI NORMALITAS
print("\n[STEP 5/5] Uji normalitas...")
if len(daily_lama) >= 3:
    normality_lama = test_normality(daily_lama)
else:
    print("‚ùå Sampel terlalu sedikit untuk uji normalitas (minimal 3 hari)")
    normality_lama = None

print("\n" + "="*80)
print("‚úÖ PIPELINE SELESAI!")
print("="*80)

# Tampilkan ringkasan
if len(trips_lama) > 0:
    print("\nüìä RINGKASAN HASIL:")
    print(f"  ‚Ä¢ Total trip valid    : {len(trips_lama)}")
    print(f"  ‚Ä¢ Total hari          : {len(daily_lama)}")
    if stats_lama:
        print(f"  ‚Ä¢ Konsumsi rata-rata  : {stats_lama['Mean']:.2f} km/L")
        print(f"  ‚Ä¢ Std deviasi         : {stats_lama['Std_Dev']:.2f} km/L")
    if normality_lama:
        print(f"  ‚Ä¢ Status normalitas   : {normality_lama['conclusion']}")


üöÄ PIPELINE ANALISIS BBM (DENGAN INTERPOLASI)

[STEP 1/5] Interpolasi Fuel Sensor...

INTERPOLASI FUEL SENSOR

NaN sebelum interpolasi : 43
NaN setelah interpolasi : 0
Data yang diisi         : 43

[STEP 2/5] Menghitung konsumsi per trip...

ANALISIS KONSUMSI BBM PER TRIP

Total trip terdeteksi: 4
Rata-rata konsumsi: 2.59 km/L
Std deviasi konsumsi: 0.50 km/L
Min konsumsi: 2.06 km/L
Max konsumsi: 3.11 km/L

[STEP 3/5] Agregasi konsumsi harian...

AGREGASI KONSUMSI PER HARI

Total hari: 2
Rata-rata konsumsi harian: 2.60 km/L
Std deviasi: 0.44 km/L

Detail per hari:
  2025-08-29: 2.92 km/L (3 trips, 78.1 km, 26.8 L)
  2025-08-31: 2.29 km/L (1 trips, 16.2 km, 7.1 L)

[STEP 4/5] Menghitung statistik deskriptif...

STATISTIK DESKRIPTIF - KONSUMSI BBM HARIAN

Jumlah Sampel (N)         : 2
Mean                      : 2.6044 km/L
Median                    : 2.6044 km/L
Standard Deviation        : 0.4427 km/L
Variance                  : 0.1960
Min                       : 2.2914 km/L
Max      

In [45]:
# ============================================================================
# CELL 10: PRINT SEMUA HASIL ANALISIS
# ============================================================================

print("\n" + "üîç"*40)
print(" " * 30 + "HASIL LENGKAP ANALISIS BBM")
print("üîç"*40)

# ============================================================================
# 1. INFORMASI DATA AWAL
# ============================================================================
print("\n" + "="*80)
print("üìÅ INFORMASI DATA AWAL")
print("="*80)
print(f"Total data mentah         : {len(df_lama):,} rows")
print(f"Data ACC ON               : {len(df_lama_active):,} rows")
print(f"Data setelah interpolasi  : {len(df_lama_active_interpolated):,} rows")
print(f"Rentang waktu             : {df_lama_active['GPSTIME'].min()} s/d {df_lama_active['GPSTIME'].max()}")

# Check fuel sensor
fuel_before = df_lama_active['VALUE FUEL SENSOR'].isna().sum()
fuel_after = df_lama_active_interpolated['VALUE FUEL SENSOR'].isna().sum()
print(f"\nNaN di VALUE FUEL SENSOR:")
print(f"  Sebelum interpolasi     : {fuel_before} ({fuel_before/len(df_lama_active)*100:.2f}%)")
print(f"  Setelah interpolasi     : {fuel_after} ({fuel_after/len(df_lama_active_interpolated)*100:.2f}%)")

# ============================================================================
# 2. HASIL DETEKSI TRIP
# ============================================================================
print("\n" + "="*80)
print("üöó HASIL DETEKSI TRIP")
print("="*80)

if len(trips_lama) > 0:
    print(f"Total trip terdeteksi     : {len(trips_lama)}")
    print(f"\nRingkasan Trip:")
    print(f"  Total jarak             : {trips_lama['distance_km'].sum():.2f} km")
    print(f"  Total BBM terpakai      : {trips_lama['fuel_used_L'].sum():.2f} liter")
    print(f"  Durasi total            : {trips_lama['duration_hours'].sum():.2f} jam")
    print(f"  Kecepatan rata-rata     : {trips_lama['avg_speed'].mean():.2f} km/jam")
    print(f"  Kecepatan maksimal      : {trips_lama['max_speed'].max():.2f} km/jam")
    
    print(f"\nüìä Statistik Konsumsi BBM per Trip:")
    print(f"  Rata-rata               : {trips_lama['consumption_kmL'].mean():.2f} km/L")
    print(f"  Median                  : {trips_lama['consumption_kmL'].median():.2f} km/L")
    print(f"  Std Deviasi             : {trips_lama['consumption_kmL'].std():.2f} km/L")
    print(f"  Min                     : {trips_lama['consumption_kmL'].min():.2f} km/L")
    print(f"  Max                     : {trips_lama['consumption_kmL'].max():.2f} km/L")
    
    print(f"\nüìã Detail 10 Trip Pertama:")
    print(trips_lama[['trip_id', 'start_time', 'distance_km', 'fuel_used_L', 'consumption_kmL']].head(10).to_string(index=False))
    
    print(f"\nüìã Detail 10 Trip Terakhir:")
    print(trips_lama[['trip_id', 'start_time', 'distance_km', 'fuel_used_L', 'consumption_kmL']].tail(10).to_string(index=False))
else:
    print("‚ùå TIDAK ADA TRIP VALID TERDETEKSI!")

# ============================================================================
# 3. HASIL AGREGASI HARIAN
# ============================================================================
print("\n" + "="*80)
print("üìÖ HASIL AGREGASI HARIAN")
print("="*80)

if len(daily_lama) > 0:
    print(f"Total hari dengan data    : {len(daily_lama)}")
    print(f"\nRingkasan Harian:")
    print(f"  Total jarak             : {daily_lama['distance_km'].sum():.2f} km")
    print(f"  Total BBM               : {daily_lama['fuel_used_L'].sum():.2f} liter")
    print(f"  Total trip              : {daily_lama['num_trips'].sum()}")
    print(f"  Rata-rata trip/hari     : {daily_lama['num_trips'].mean():.2f}")
    
    print(f"\nüìä Konsumsi BBM per Hari:")
    print(f"  Rata-rata               : {daily_lama['avg_consumption_kmL'].mean():.2f} km/L")
    print(f"  Median                  : {daily_lama['avg_consumption_kmL'].median():.2f} km/L")
    print(f"  Std Deviasi             : {daily_lama['avg_consumption_kmL'].std():.2f} km/L")
    print(f"  Min                     : {daily_lama['avg_consumption_kmL'].min():.2f} km/L")
    print(f"  Max                     : {daily_lama['avg_consumption_kmL'].max():.2f} km/L")
    
    print(f"\nüìã Detail Per Hari:")
    print("-" * 80)
    for idx, row in daily_lama.iterrows():
        print(f"{row['date']} | {row['num_trips']:2d} trips | "
              f"{row['distance_km']:6.1f} km | {row['fuel_used_L']:5.1f} L | "
              f"{row['avg_consumption_kmL']:5.2f} km/L | "
              f"Avg Speed: {row['avg_speed']:.1f} km/h")
else:
    print("‚ùå TIDAK ADA DATA HARIAN!")

# ============================================================================
# 4. STATISTIK DESKRIPTIF
# ============================================================================
print("\n" + "="*80)
print("üìà STATISTIK DESKRIPTIF KONSUMSI BBM HARIAN")
print("="*80)

if stats_lama:
    print(f"\n{'Statistik':<25} {'Nilai':<20} {'Satuan':<10}")
    print("-" * 80)
    print(f"{'Jumlah Sampel (N)':<25} {stats_lama['N']:<20} {'hari':<10}")
    print(f"{'Mean':<25} {stats_lama['Mean']:<20.4f} {'km/L':<10}")
    print(f"{'Median':<25} {stats_lama['Median']:<20.4f} {'km/L':<10}")
    print(f"{'Standard Deviation':<25} {stats_lama['Std_Dev']:<20.4f} {'km/L':<10}")
    print(f"{'Variance':<25} {stats_lama['Variance']:<20.4f} {'':<10}")
    print(f"{'Minimum':<25} {stats_lama['Min']:<20.4f} {'km/L':<10}")
    print(f"{'Maximum':<25} {stats_lama['Max']:<20.4f} {'km/L':<10}")
    print(f"{'Range':<25} {stats_lama['Range']:<20.4f} {'km/L':<10}")
    print(f"{'Q1 (Percentile 25)':<25} {stats_lama['Q1']:<20.4f} {'km/L':<10}")
    print(f"{'Q3 (Percentile 75)':<25} {stats_lama['Q3']:<20.4f} {'km/L':<10}")
    print(f"{'IQR':<25} {stats_lama['IQR']:<20.4f} {'km/L':<10}")
    print(f"{'Skewness':<25} {stats_lama['Skewness']:<20.4f} {'':<10}")
    print(f"{'Kurtosis':<25} {stats_lama['Kurtosis']:<20.4f} {'':<10}")
    
    print("\nüìä Interpretasi Distribusi:")
    if abs(stats_lama['Skewness']) < 0.5:
        print("  ‚úì Skewness: Distribusi SIMETRIS (mendekati normal)")
    elif stats_lama['Skewness'] > 0:
        print("  ‚ö† Skewness: Distribusi MIRING KANAN (positive skew)")
    else:
        print("  ‚ö† Skewness: Distribusi MIRING KIRI (negative skew)")
    
    if abs(stats_lama['Kurtosis']) < 0.5:
        print("  ‚úì Kurtosis: Distribusi NORMAL (mesokurtic)")
    elif stats_lama['Kurtosis'] > 0:
        print("  ‚ö† Kurtosis: Distribusi LANCIP (leptokurtic - ekor tebal)")
    else:
        print("  ‚ö† Kurtosis: Distribusi DATAR (platykurtic - ekor tipis)")
else:
    print("‚ùå TIDAK ADA STATISTIK DESKRIPTIF!")

# ============================================================================
# 5. HASIL UJI NORMALITAS
# ============================================================================
print("\n" + "="*80)
print("üî¨ HASIL UJI NORMALITAS")
print("="*80)

if normality_lama:
    print(f"\n{'Test':<30} {'Statistic':<15} {'p-value':<15} {'Hasil':<15}")
    print("-" * 80)
    
    # Shapiro-Wilk
    result_sw = "‚úì NORMAL" if normality_lama['shapiro_normal'] else "‚úó TIDAK NORMAL"
    print(f"{'Shapiro-Wilk Test':<30} {normality_lama['shapiro_stat']:<15.6f} "
          f"{normality_lama['shapiro_pvalue']:<15.6f} {result_sw:<15}")
    
    # Kolmogorov-Smirnov
    result_ks = "‚úì NORMAL" if normality_lama['ks_normal'] else "‚úó TIDAK NORMAL"
    print(f"{'Kolmogorov-Smirnov Test':<30} {normality_lama['ks_stat']:<15.6f} "
          f"{normality_lama['ks_pvalue']:<15.6f} {result_ks:<15}")
    
    # Anderson-Darling
    result_ad = "‚úì NORMAL" if normality_lama['anderson_normal'] else "‚úó TIDAK NORMAL"
    print(f"{'Anderson-Darling Test':<30} {normality_lama['anderson_stat']:<15.6f} "
          f"{'N/A':<15} {result_ad:<15}")
    
    print("\n" + "-" * 80)
    print("üìã Anderson-Darling Critical Values:")
    for i, (cv, sl) in enumerate(zip(normality_lama['anderson_critical'], 
                                     normality_lama['anderson_significance'])):
        status = "NORMAL" if normality_lama['anderson_stat'] < cv else "TIDAK NORMAL"
        print(f"  Significance Level {sl:4.1f}%: {cv:.3f} ‚Üí {status}")
    
    print("\n" + "="*80)
    print("üìä KESIMPULAN UJI NORMALITAS")
    print("="*80)
    
    normal_count = sum([
        normality_lama['shapiro_normal'],
        normality_lama['ks_normal'],
        normality_lama['anderson_normal']
    ])
    
    print(f"\nJumlah uji yang menyatakan NORMAL: {normal_count}/3")
    print(f"\n{'='*80}")
    
    if normality_lama['conclusion'] == 'NORMAL':
        print("‚úÖ KESIMPULAN AKHIR: Data berdistribusi NORMAL")
        print("\nüìå REKOMENDASI:")
        print("   ‚Ä¢ Dapat menggunakan UJI PARAMETRIK (Independent t-test)")
        print("   ‚Ä¢ Asumsi normalitas terpenuhi")
        print("   ‚Ä¢ Hasil uji hipotesis akan lebih powerful")
    else:
        print("‚ùå KESIMPULAN AKHIR: Data TIDAK berdistribusi normal")
        print("\nüìå REKOMENDASI:")
        print("   ‚Ä¢ Sebaiknya gunakan UJI NON-PARAMETRIK (Mann-Whitney U Test)")
        print("   ‚Ä¢ Asumsi normalitas TIDAK terpenuhi")
        print("   ‚Ä¢ Uji non-parametrik lebih robust untuk data tidak normal")
    
    print(f"{'='*80}")
else:
    print("‚ùå TIDAK ADA HASIL UJI NORMALITAS!")

# ============================================================================
# 6. RINGKASAN EKSEKUTIF
# ============================================================================
print("\n" + "üéØ"*40)
print(" " * 30 + "RINGKASAN EKSEKUTIF")
print("üéØ"*40)

print(f"\nüìä DATA:")
print(f"  ‚Ä¢ Total hari analisis       : {len(daily_lama)} hari")
print(f"  ‚Ä¢ Total trip valid          : {len(trips_lama)} trips")
print(f"  ‚Ä¢ Total jarak tempuh        : {daily_lama['distance_km'].sum():.2f} km" if len(daily_lama) > 0 else "  ‚Ä¢ Total jarak tempuh        : 0 km")
print(f"  ‚Ä¢ Total konsumsi BBM        : {daily_lama['fuel_used_L'].sum():.2f} liter" if len(daily_lama) > 0 else "  ‚Ä¢ Total konsumsi BBM        : 0 liter")

if stats_lama:
    print(f"\nüìà KONSUMSI BBM:")
    print(f"  ‚Ä¢ Rata-rata harian          : {stats_lama['Mean']:.2f} km/L")
    print(f"  ‚Ä¢ Std Deviasi               : {stats_lama['Std_Dev']:.2f} km/L")
    print(f"  ‚Ä¢ Range                     : {stats_lama['Min']:.2f} - {stats_lama['Max']:.2f} km/L")
    print(f"  ‚Ä¢ Coefficient of Variation  : {(stats_lama['Std_Dev']/stats_lama['Mean']*100):.2f}%")

if normality_lama:
    print(f"\nüî¨ UJI STATISTIK:")
    print(f"  ‚Ä¢ Status normalitas         : {normality_lama['conclusion']}")
    if normality_lama['conclusion'] == 'NORMAL':
        print(f"  ‚Ä¢ Uji yang direkomendasikan : Independent t-test (PARAMETRIK)")
    else:
        print(f"  ‚Ä¢ Uji yang direkomendasikan : Mann-Whitney U Test (NON-PARAMETRIK)")

print("\n" + "üéØ"*40)
print(" " * 25 + "ANALISIS SELESAI - TERIMA KASIH!")
print("üéØ"*40 + "\n")


üîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîç
                              HASIL LENGKAP ANALISIS BBM
üîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîçüîç

üìÅ INFORMASI DATA AWAL
Total data mentah         : 1,505 rows
Data ACC ON               : 1,278 rows
Data setelah interpolasi  : 1,278 rows
Rentang waktu             : 2025-08-28 07:44:03 s/d 2025-08-31 14:10:37

NaN di VALUE FUEL SENSOR:
  Sebelum interpolasi     : 43 (3.36%)
  Setelah interpolasi     : 0 (0.00%)

üöó HASIL DETEKSI TRIP
Total trip terdeteksi     : 4

Ringkasan Trip:
  Total jarak             : 94.30 km
  Total BBM terpakai      : 33.84 liter
  Durasi total            : 5.17 jam
  Kecepatan rata-rata     : 21.38 km/jam
  Kecepatan maksimal      : 50.00 km/jam

üìä Statistik Konsumsi BBM per 

In [46]:
# ============================================================================
# CELL 10: VISUALISASI - DISTRIBUSI KONSUMSI
# ============================================================================

def plot_consumption_distribution(daily_df, stats_dict, normality_dict, title_suffix=""):
    """
    Visualisasi distribusi konsumsi BBM harian
    """
    if len(daily_df) == 0:
        print("‚ö†Ô∏è  Tidak ada data untuk divisualisasikan")
        return
    
    consumption = daily_df['avg_consumption_kmL'].values
    
    fig = plt.figure(figsize=(16, 10))
    gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)
    
    fig.suptitle(f'Analisis Distribusi Konsumsi BBM Harian {title_suffix}', 
                 fontsize=16, fontweight='bold', y=0.98)
    
    # 1. HISTOGRAM WITH KDE
    ax1 = fig.add_subplot(gs[0, :2])
    n, bins, patches = ax1.hist(consumption, bins=max(5, len(consumption)//2), 
                                  alpha=0.7, color='steelblue', edgecolor='black',
                                  density=True, label='Data')
    
    from scipy.stats import gaussian_kde
    kde = gaussian_kde(consumption)
    x_range = np.linspace(consumption.min(), consumption.max(), 100)
    ax1.plot(x_range, kde(x_range), 'r-', linewidth=2, label='KDE')
    
    mu, sigma = stats_dict['Mean'], stats_dict['Std_Dev']
    x_normal = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
    ax1.plot(x_normal, stats.norm.pdf(x_normal, mu, sigma), 
             'g--', linewidth=2, label='Normal Distribution')
    
    ax1.axvline(mu, color='red', linestyle='--', linewidth=2, label=f'Mean = {mu:.2f}')
    ax1.axvline(stats_dict['Median'], color='orange', linestyle='--', 
                linewidth=2, label=f'Median = {stats_dict["Median"]:.2f}')
    
    ax1.set_xlabel('Konsumsi BBM (km/L)', fontsize=11, fontweight='bold')
    ax1.set_ylabel('Density', fontsize=11, fontweight='bold')
    ax1.set_title('Histogram & Density Plot', fontsize=12, fontweight='bold')
    ax1.legend(loc='best')
    ax1.grid(alpha=0.3)
    
    # 2. BOX PLOT
    ax2 = fig.add_subplot(gs[0, 2])
    bp = ax2.boxplot(consumption, vert=True, patch_artist=True,
                     boxprops=dict(facecolor='lightblue', alpha=0.7),
                     medianprops=dict(color='red', linewidth=2),
                     whiskerprops=dict(linewidth=1.5),
                     capprops=dict(linewidth=1.5))
    
    ax2.set_ylabel('Konsumsi BBM (km/L)', fontsize=11, fontweight='bold')
    ax2.set_title('Box Plot', fontsize=12, fontweight='bold')
    ax2.grid(axis='y', alpha=0.3)
    
    stats_text = f'Q1 = {stats_dict["Q1"]:.2f}\nMedian = {stats_dict["Median"]:.2f}\nQ3 = {stats_dict["Q3"]:.2f}\nIQR = {stats_dict["IQR"]:.2f}'
    ax2.text(1.15, stats_dict['Median'], stats_text, fontsize=9,
             bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
    
    # 3. Q-Q PLOT
    ax3 = fig.add_subplot(gs[1, 0])
    stats.probplot(consumption, dist="norm", plot=ax3)
    ax3.set_title('Q-Q Plot (Normality Check)', fontsize=12, fontweight='bold')
    ax3.grid(alpha=0.3)
    
    # 4. VIOLIN PLOT
    ax4 = fig.add_subplot(gs[1, 1])
    parts = ax4.violinplot([consumption], positions=[1], showmeans=True, 
                           showmedians=True, widths=0.7)
    
    for pc in parts['bodies']:
        pc.set_facecolor('lightgreen')
        pc.set_alpha(0.7)
    
    ax4.set_ylabel('Konsumsi BBM (km/L)', fontsize=11, fontweight='bold')
    ax4.set_title('Violin Plot', fontsize=12, fontweight='bold')
    ax4.set_xticks([1])
    ax4.set_xticklabels(['Konsumsi'])
    ax4.grid(axis='y', alpha=0.3)
    
    # 5. STATISTICAL SUMMARY TABLE
    ax5 = fig.add_subplot(gs[1, 2])
    ax5.axis('off')
    
    stats_data = [
        ['N', f"{stats_dict['N']}"],
        ['Mean', f"{stats_dict['Mean']:.4f} km/L"],
        ['Std Dev', f"{stats_dict['Std_Dev']:.4f} km/L"],
        ['Variance', f"{stats_dict['Variance']:.4f}"],
        ['Min', f"{stats_dict['Min']:.4f} km/L"],
        ['Max', f"{stats_dict['Max']:.4f} km/L"],
        ['Range', f"{stats_dict['Range']:.4f} km/L"],
        ['Median', f"{stats_dict['Median']:.4f} km/L"],
        ['Q1', f"{stats_dict['Q1']:.4f} km/L"],
        ['Q3', f"{stats_dict['Q3']:.4f} km/L"],
        ['IQR', f"{stats_dict['IQR']:.4f} km/L"],
        ['Skewness', f"{stats_dict['Skewness']:.4f}"],
        ['Kurtosis', f"{stats_dict['Kurtosis']:.4f}"]
    ]
    
    table = ax5.table(cellText=stats_data, colLabels=['Statistic', 'Value'],
                     loc='center', cellLoc='left', colWidths=[0.4, 0.6])
    table.auto_set_font_size(False)
    table.set_fontsize(9)
    table.scale(1, 1.5)
    
    for i in range(2):
        table[(0, i)].set_facecolor('#4CAF50')
        table[(0, i)].set_text_props(weight='bold', color='white')
    
    ax5.set_title('Statistik Deskriptif', fontsize=12, fontweight='bold', pad=20)
    
    # 6. NORMALITY TEST RESULTS
    ax6 = fig.add_subplot(gs[2, :])
    ax6.axis('off')
    
    if normality_dict:
        normality_data = [
            ['Shapiro-Wilk Test', 
             f"{normality_dict['shapiro_stat']:.6f}",
             f"{normality_dict['shapiro_pvalue']:.6f}",
             '‚úì Normal' if normality_dict['shapiro_normal'] else '‚úó Not Normal'],
            ['Kolmogorov-Smirnov Test',
             f"{normality_dict['ks_stat']:.6f}",
             f"{normality_dict['ks_pvalue']:.6f}",
             '‚úì Normal' if normality_dict['ks_normal'] else '‚úó Not Normal'],
            ['Anderson-Darling Test',
             f"{normality_dict['anderson_stat']:.6f}",
             'N/A',
             '‚úì Normal' if normality_dict['anderson_normal'] else '‚úó Not Normal']
        ]
        
        table2 = ax6.table(cellText=normality_data, 
                          colLabels=['Test', 'Statistic', 'p-value', 'Result'],
                          loc='center', cellLoc='center', 
                          colWidths=[0.3, 0.2, 0.2, 0.3])
        table2.auto_set_font_size(False)
        table2.set_fontsize(10)
        table2.scale(1, 2)
        
        for i in range(4):
            table2[(0, i)].set_facecolor('#2196F3')
            table2[(0, i)].set_text_props(weight='bold', color='white')
        
        for i in range(1, 4):
            if 'Normal' in normality_data[i-1][3]:
                table2[(i, 3)].set_facecolor('#90EE90')
            else:
                table2[(i, 3)].set_facecolor('#FFB6C1')
        
        conclusion_text = f"\nKESIMPULAN: {normality_dict['conclusion']}"
        ax6.text(0.5, 0.05, conclusion_text, ha='center', va='center',
                fontsize=12, fontweight='bold',
                bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.7))
        
        ax6.set_title('Hasil Uji Normalitas', fontsize=12, fontweight='bold', pad=20)
    
    plt.tight_layout()
    return fig


# VISUALISASI
if len(daily_lama) > 0 and stats_lama and normality_lama:
    fig1 = plot_consumption_distribution(
        daily_lama, 
        stats_lama, 
        normality_lama,
        title_suffix="- Data Lama (4 Hari)"
    )
    plt.show()
else:
    print("‚ö†Ô∏è  Data tidak cukup untuk visualisasi")

‚ö†Ô∏è  Data tidak cukup untuk visualisasi
