In [None]:
import pandas as pd
import datetime as dt
import numpy as np

# 1. LOAD DATA
print("กำลังอ่านไฟล์ train.csv...")
df = pd.read_csv('train.csv')

# 2. CLEANING
df['Order Date'] = pd.to_datetime(df['Order Date'], dayfirst=True, format='%d/%m/%Y', errors='coerce')
df['Ship Date'] = pd.to_datetime(df['Ship Date'], dayfirst=True, format='%d/%m/%Y', errors='coerce')
df.dropna(subset=['Order Date', 'Ship Date'], inplace=True)
df = df[df['Ship Date'] >= df['Order Date']]
df['Delivery Days'] = (df['Ship Date'] - df['Order Date']).dt.days

# 3. RFM CALCULATION
latest_date = df['Order Date'].max() + dt.timedelta(days=1)
rfm = df.groupby('Customer ID').agg({
    'Order Date': lambda x: (latest_date - x.max()).days,
    'Order ID': 'count',
    'Sales': 'sum'
}).reset_index()
rfm.columns = ['Customer ID', 'Recency', 'Frequency', 'Monetary']

# 4. SCORING & SEGMENTATION
labels = range(1, 6)
rfm['R_Score'] = pd.qcut(rfm['Recency'], q=5, labels=list(range(5, 0, -1)))
rfm['F_Score'] = pd.qcut(rfm['Frequency'], q=5, labels=labels, duplicates='drop')
rfm['M_Score'] = pd.qcut(rfm['Monetary'], q=5, labels=labels)
rfm['RFM_Score'] = rfm[['R_Score', 'F_Score', 'M_Score']].sum(axis=1)

def segment_customer(score):
    if score >= 13: return 'VIP'
    elif score >= 9: return 'Potential'
    elif score >= 5: return 'Needs Attention'
    else: return 'Lost'
    
rfm['Customer_Grade'] = rfm['RFM_Score'].apply(segment_customer)

# 5. MERGE & EXPORT

final_df = df.merge(rfm[['Customer ID', 'Customer_Grade', 'RFM_Score']], on='Customer ID', how='left')

# Save
final_df.to_csv('superstore_final_for_powerbi.csv', index=False)

print("-" * 30)
print("✅ สำเร็จ! แก้ไขชื่อคอลัมน์เป็น 'Customer_Grade' เรียบร้อย")
print(f"จำนวนแถว: {len(final_df)}")
print("-" * 30)

# แสดงตัวอย่างข้อมูล
cols_to_show = ['Order ID', 'Segment', 'Customer_Grade', 'Sales']
display(final_df[cols_to_show].head())

กำลังอ่านไฟล์ train.csv...
------------------------------
✅ สำเร็จ! แก้ไขชื่อคอลัมน์เป็น 'Customer_Grade' เรียบร้อย
จำนวนแถว: 9800
------------------------------


Unnamed: 0,Order ID,Segment,Customer_Grade,Sales
0,CA-2017-152156,Consumer,Lost,261.96
1,CA-2017-152156,Consumer,Lost,731.94
2,CA-2017-138688,Corporate,Potential,14.62
3,US-2016-108966,Consumer,Potential,957.5775
4,US-2016-108966,Consumer,Potential,22.368
