In [None]:


# --- Print Skewness Values ---
print('\nSkewness values:')
print(f"Recency: {recency_skew:.2f}, Frequency: {frequency_skew:.2f}, Monetary: {monetary_skew:.2f}")

# --- 1. Define Scoring Functions ---

def score_recency(x):
    # Lower recency = better. Tuned for e-commerce with 3-year dataset.
    if x <= 30:
        return 5
    elif x <= 90:
        return 4
    elif x <= 180:
        return 3
    elif x <= 365:
        return 2
    else:
        return 1


def score_frequency(x):
    # Higher frequency = better. Reflects typical repeat rates.
    # Assumes 'Frequency' is the count of transactions.
    if x == 1:
        return 1
    elif x <= 2:
        return 2
    elif x <= 4:
        return 3
    elif x <= 7:
        return 4
    else:
        return 5


def score_monetary_log_manual(x_log):
    # Monetary scoring using MANUAL BINS on the LOG-TRANSFORMED value (x_log).
    # This handles extreme skewness and allows for domain-driven segmentation.
    # NOTE: These log thresholds MUST be tuned based on the distribution of np.log1p(Monetary).
    
    # Example log thresholds (log(M+1)) roughly corresponding to actual spending:
    # 3.5 ≈ M=32 | 5.0 ≈ M=147 | 7.0 ≈ M=1095 | 8.5 ≈ M=4940
    
    if x_log <= 3.5:
        return 1
    elif x_log <= 5.0:
        return 2
    elif x_log <= 7.0:
        return 3
    elif x_log <= 8.5:
        return 4
    else:
        return 5

# --- 2. Apply Transformation and Scoring ---

# 2a. Apply log transformation to Monetary
# np.log1p is log(x + 1), which handles the high skewness and avoids log(0) error.
rfm['Monetary_log'] = np.log1p(rfm['Monetary'])
print('Monetary data has been log-transformed for scoring.')

# 2b. Apply scoring functions
rfm['R_score'] = rfm['Recency'].apply(score_recency)
rfm['F_score'] = rfm['Frequency'].apply(score_frequency)
rfm['M_score'] = rfm['Monetary_log'].apply(score_monetary_log_manual)

# 3. Compose RFM code and numeric RFM sum
rfm['RFM_Class'] = rfm['R_score'].astype(str) + rfm['F_score'].astype(str) + rfm['M_score'].astype(str)
rfm['RFM_Sum'] = rfm['R_score'] + rfm['F_score'] + rfm['M_score']

print('\nRFM scoring complete using manual bins for R & F, and log-transformed manual bins for M.')