In [1]:
import sqlite3
import pandas as pd

# 메모리에 임시 DB 생성
conn = sqlite3.connect(':memory:')
cur = conn.cursor()

# 1. 주문 테이블 생성 (날짜 컬럼 추가)
cur.execute('''
CREATE TABLE orders (
    order_id VARCHAR(10),
    user_id VARCHAR(10),
    order_date DATE,
    amount INTEGER
)
''')

# 2. 실전 같은 더미 데이터 (2023년~2024년 데이터)
data = [
    ('O001', 'User_A', '2024-01-01', 50000),
    ('O002', 'User_A', '2024-01-03', 30000),
    ('O003', 'User_B', '2023-05-01', 100000),
    ('O004', 'User_B', '2023-06-15', 20000),
    ('O005', 'User_C', '2024-01-02', 5000),
    ('O006', 'User_C', '2024-01-03', 10000),
    ('O007', 'User_D', '2023-12-10', 150000),
    ('O008', 'User_A', '2024-01-05', 70000),
]

cur.executemany('INSERT INTO orders VALUES (?, ?, ?, ?)', data)
conn.commit()

print("✅ Day 6 프로젝트 데이터 준비 완료!")

✅ Day 6 프로젝트 데이터 준비 완료!


In [12]:
sql_rfm = """
WITH RFM_BASE AS (
    SELECT
        user_id, Monetary,
        CASE
            WHEN Recency <= 10 THEN 5
            WHEN Recency <= 30 THEN 3
            ELSE 1
        END AS R_Score,
        CASE
            WHEN Frequency >= 3 THEN 5
            WHEN Frequency >= 2 THEN 3
            ELSE 1
        END AS F_Score,
        CASE
            WHEN Monetary >= 100000 THEN 5
            WHEN Monetary >= 30000 THEN 3
            ELSE 1
        END AS M_Score
    FROM (
        SELECT
            user_id, 
            CAST(julianday('2024-01-06') - julianday(MAX(order_date)) AS INTEGER) AS Recency,
            COUNT(user_id) AS Frequency, 
            SUM(amount) AS Monetary
        FROM orders
        GROUP BY user_id
    )
),

RFM_FINAL AS (
    SELECT
        user_id, Monetary,
        CASE
            WHEN R_Score = 5 AND (F_Score >= 3 OR M_Score >= 3) THEN 'VIP'
            WHEN R_Score = 1 AND (F_Score >= 3 OR M_Score >= 3) THEN '이탈 위험'
            WHEN R_Score >= 3 AND F_Score = 1 THEN '신규'
            ELSE '일반'
        END AS Class
    FROM RFM_BASE
)

SELECT
    Class,
    COUNT(user_id) AS User_Count,
    SUM(Monetary) AS Total_Revenue,
    ROUND(AVG(Monetary), 0) AS Avg_Revenue
FROM RFM_FINAL
GROUP BY Class
ORDER BY Total_Revenue DESC;
"""
print(pd.read_sql(sql_rfm, conn))

   Class  User_Count  Total_Revenue  Avg_Revenue
0    VIP           2         165000      82500.0
1     신규           1         150000     150000.0
2  이탈 위험           1         120000     120000.0
