In [None]:
# ==========================================================
# 📊 Analisis Data Customer (CCS)
# ==========================================================

import pandas as pd

# 1. Baca semua dataset
customers = pd.read_csv("sample data/customers.csv")
gasstations = pd.read_csv("sample data/gasstations.csv")
products = pd.read_csv("sample data/products.csv")
sample = pd.read_csv("sample data/sample.csv")

# 2. Gabungkan semua dataset
merged_df = sample.merge(customers, on="CustomerID", how="left") \
                  .merge(gasstations, left_on="GasStationID", right_on="SiteID", how="left") \
                  .merge(products, on="ProductID", how="left")

# 3. Tambahkan kolom total transaksi
merged_df['Total_Transaction'] = merged_df['Amount'] * merged_df['Price']

print("✅ Data berhasil digabung. Jumlah baris:", len(merged_df))
print("Kolom tersedia:", merged_df.columns.tolist())

# ==========================================================
# 1️⃣ Top 5 Customers dengan nilai transaksi terbanyak
# ==========================================================
top5_customers = (
    merged_df.groupby('CustomerID')['Total_Transaction']
    .sum()
    .sort_values(ascending=False)
    .head(5)
)

print("\n🏆 Top 5 Customers dengan transaksi terbanyak:")
print(top5_customers)

# ==========================================================
# 2️⃣ Top 5 Gas Stations dengan nilai transaksi terbanyak
# ==========================================================
top5_gasstations = (
    merged_df.groupby('GasStationID')['Total_Transaction']
    .sum()
    .sort_values(ascending=False)
    .head(5)
)

print("\n⛽ Top 5 Gas Stations dengan transaksi terbanyak:")
print(top5_gasstations)

# ==========================================================
# 3️⃣ Top 5 Produk dengan nilai transaksi terbanyak
# ==========================================================
top5_products = (
    merged_df.groupby('Description')['Total_Transaction']
    .sum()
    .sort_values(ascending=False)
    .head(5)
)

print("\n🛢️ Top 5 Produk dengan transaksi terbanyak:")
print(top5_products)

# ==========================================================
# 4️⃣ Deskripsi statistik per hari (23–26)
# ==========================================================
merged_df['Date'] = pd.to_datetime(merged_df['Date'])
for day in [23,24,25,26]:
    daily_data = merged_df[merged_df['Date'].dt.day == day]
    if len(daily_data) > 0:
        print(f"\n📅 Statistik Hari {day}:")
        print(daily_data[['Amount', 'Price', 'Total_Transaction']].describe())
    else:
        print(f"\n📅 Tidak ada data untuk tanggal {day}")

# ==========================================================
# 5️⃣ Waktu terbaik (hari dan jam) transaksi terbanyak
# ==========================================================
merged_df['Datetime'] = pd.to_datetime(merged_df['Date'].astype(str) + ' ' + merged_df['Time'].astype(str))
merged_df['Hour'] = merged_df['Datetime'].dt.hour

transactions_per_hour = merged_df.groupby(['Date', 'Hour']).size().reset_index(name='Total_Transactions')
best_time = transactions_per_hour.sort_values('Total_Transactions', ascending=False).head(1)

print("\n⏰ Waktu dengan transaksi terbanyak:")
print(best_time)

# ==========================================================
# 6️⃣ Business Understanding
# ==========================================================
print("""
💼 Tujuan utama analisis ini adalah:
- Memahami perilaku pelanggan dan pola transaksi di SPBU.
- Mengetahui pelanggan, lokasi, dan produk paling menguntungkan.
- Menjadi dasar strategi promosi dan pengelolaan stok bahan bakar.
""")
