# RFM Analysis - FLO Shoes (Omni-channel)

**Dataset**: FLO Shoes Customer Data

**Source**: Kaggle - FLO RFM Analysis Dataset

**Description**: Turkish shoe retailer with both online and offline channels

**Complexity**: Medium

## Focus Areas
- **Omni-channel Analysis** (Online vs Offline behavior)
- Channel preference segmentation
- Cross-channel customer value
- Channel-specific RFM scores

## Dataset Features
- `master_id`: Unique customer ID
- `order_channel`: Online/Offline/Mobile
- `last_order_channel`: Last purchase channel
- `first_order_date`: Date of first purchase
- `last_order_date`: Date of last purchase
- `order_num_total`: Total number of orders
- `customer_value_total`: Total customer spend
- Channel-specific metrics (online/offline/mobile)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

## 1. Load and Explore Data

In [None]:
# Load FLO dataset
# Download from Kaggle: https://www.kaggle.com/datasets/serhatckl/flo-rfm-analysis-dataset

df = pd.read_csv('flo_data_20k.csv')

print(f"Dataset shape: {df.shape}")
print(f"\nColumns:")
print(df.columns.tolist())
print(f"\nFirst few rows:")
df.head()

In [None]:
# Data info
print("Data types:")
print(df.dtypes)
print("\nMissing values:")
print(df.isnull().sum())
print("\nBasic statistics:")
df.describe()

## 2. Data Preprocessing

In [None]:
# Convert date columns to datetime
date_columns = ['first_order_date', 'last_order_date', 'last_order_date_online', 
                'last_order_date_offline']

for col in date_columns:
    if col in df.columns:
        df[col] = pd.to_datetime(df[col])

# Identify channel-specific columns
print("Available columns in dataset:")
for col in df.columns:
    print(f"  - {col}")

In [None]:
# Typical FLO dataset structure includes:
# - order_num_total_ever_online
# - order_num_total_ever_offline
# - customer_value_total_ever_offline
# - customer_value_total_ever_online

# Create aggregated metrics
df['total_orders'] = df['order_num_total_ever_online'] + df['order_num_total_ever_offline']
df['total_value'] = df['customer_value_total_ever_offline'] + df['customer_value_total_ever_online']

# Calculate channel preference
df['online_percentage'] = (df['customer_value_total_ever_online'] / df['total_value'] * 100).round(2)
df['offline_percentage'] = (df['customer_value_total_ever_offline'] / df['total_value'] * 100).round(2)

# Channel classification
def classify_channel_preference(row):
    if row['online_percentage'] >= 75:
        return 'Online-Dominant'
    elif row['offline_percentage'] >= 75:
        return 'Offline-Dominant'
    elif row['online_percentage'] > 0 and row['offline_percentage'] > 0:
        return 'Omni-channel'
    else:
        return 'Single-Channel'

df['channel_preference'] = df.apply(classify_channel_preference, axis=1)

print("Channel preference distribution:")
print(df['channel_preference'].value_counts())

## 3. Overall RFM Calculation

In [None]:
# Set analysis date (day after last transaction)
analysis_date = df['last_order_date'].max() + timedelta(days=1)
print(f"Analysis date: {analysis_date}")

# Calculate RFM
rfm = pd.DataFrame()
rfm['customer_id'] = df['master_id']
rfm['Recency'] = (analysis_date - df['last_order_date']).dt.days
rfm['Frequency'] = df['total_orders']
rfm['Monetary'] = df['total_value']

# Add channel-specific metrics
rfm['orders_online'] = df['order_num_total_ever_online']
rfm['orders_offline'] = df['order_num_total_ever_offline']
rfm['value_online'] = df['customer_value_total_ever_online']
rfm['value_offline'] = df['customer_value_total_ever_offline']
rfm['channel_preference'] = df['channel_preference']
rfm['online_percentage'] = df['online_percentage']
rfm['offline_percentage'] = df['offline_percentage']

# Calculate average order values
rfm['avg_order_value'] = rfm['Monetary'] / rfm['Frequency']
rfm['avg_order_value_online'] = rfm['value_online'] / rfm['orders_online'].replace(0, np.nan)
rfm['avg_order_value_offline'] = rfm['value_offline'] / rfm['orders_offline'].replace(0, np.nan)

print("RFM Summary:")
print(rfm[['Recency', 'Frequency', 'Monetary']].describe())

## 4. Channel-Specific RFM Scores

In [None]:
# Overall RFM scores
rfm['R_Score'] = pd.qcut(rfm['Recency'], q=5, labels=[5, 4, 3, 2, 1], duplicates='drop').astype(int)
rfm['F_Score'] = pd.qcut(rfm['Frequency'].rank(method='first'), q=5, labels=[1, 2, 3, 4, 5], duplicates='drop').astype(int)
rfm['M_Score'] = pd.qcut(rfm['Monetary'], q=5, labels=[1, 2, 3, 4, 5], duplicates='drop').astype(int)

# Online-specific RFM scores (for customers with online purchases)
online_customers = rfm[rfm['orders_online'] > 0].copy()
online_customers['R_Score_Online'] = pd.qcut(online_customers['Recency'], q=5, labels=[5, 4, 3, 2, 1], duplicates='drop').astype(int)
online_customers['F_Score_Online'] = pd.qcut(online_customers['orders_online'].rank(method='first'), q=5, labels=[1, 2, 3, 4, 5], duplicates='drop').astype(int)
online_customers['M_Score_Online'] = pd.qcut(online_customers['value_online'], q=5, labels=[1, 2, 3, 4, 5], duplicates='drop').astype(int)

# Offline-specific RFM scores
offline_customers = rfm[rfm['orders_offline'] > 0].copy()
offline_customers['R_Score_Offline'] = pd.qcut(offline_customers['Recency'], q=5, labels=[5, 4, 3, 2, 1], duplicates='drop').astype(int)
offline_customers['F_Score_Offline'] = pd.qcut(offline_customers['orders_offline'].rank(method='first'), q=5, labels=[1, 2, 3, 4, 5], duplicates='drop').astype(int)
offline_customers['M_Score_Offline'] = pd.qcut(offline_customers['value_offline'], q=5, labels=[1, 2, 3, 4, 5], duplicates='drop').astype(int)

# Merge back
rfm = rfm.merge(online_customers[['customer_id', 'R_Score_Online', 'F_Score_Online', 'M_Score_Online']], 
                on='customer_id', how='left')
rfm = rfm.merge(offline_customers[['customer_id', 'R_Score_Offline', 'F_Score_Offline', 'M_Score_Offline']], 
                on='customer_id', how='left')

rfm['RFM_Score'] = rfm['R_Score'].astype(str) + rfm['F_Score'].astype(str) + rfm['M_Score'].astype(str)
rfm['RFM_Total'] = rfm['R_Score'] + rfm['F_Score'] + rfm['M_Score']

print("RFM scoring complete")
rfm.head(10)

## 5. Customer Segmentation

In [None]:
def segment_customers(df):
    segments = []
    for _, row in df.iterrows():
        r, f, m = row['R_Score'], row['F_Score'], row['M_Score']
        
        if r >= 4 and f >= 4 and m >= 4:
            segment = 'Champions'
        elif r >= 3 and f >= 4:
            segment = 'Loyal Customers'
        elif r >= 4 and 2 <= f <= 3:
            segment = 'Potential Loyalists'
        elif r >= 4 and f <= 2:
            segment = 'New Customers'
        elif 3 <= r <= 4 and f <= 2:
            segment = 'Promising'
        elif r >= 3 and f >= 3 and m >= 3:
            segment = 'Need Attention'
        elif 2 <= r <= 3:
            segment = 'About to Sleep'
        elif r <= 2 and f >= 4 and m >= 4:
            segment = 'At Risk'
        elif r <= 1 and f >= 4 and m >= 4:
            segment = "Can't Lose Them"
        elif r <= 2 and f <= 2:
            segment = 'Hibernating'
        else:
            segment = 'Lost'
        segments.append(segment)
    return segments

rfm['Segment'] = segment_customers(rfm)

print("Segment Distribution:")
print(rfm['Segment'].value_counts())

## 6. Omni-channel Analysis

In [None]:
# Channel preference distribution
channel_dist = rfm['channel_preference'].value_counts()

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Bar chart
channel_dist.plot(kind='bar', ax=axes[0], color='steelblue')
axes[0].set_xlabel('Channel Preference', fontsize=12)
axes[0].set_ylabel('Number of Customers', fontsize=12)
axes[0].set_title('Customer Distribution by Channel Preference', fontsize=14, fontweight='bold')
axes[0].tick_params(axis='x', rotation=45)

# Pie chart
colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99']
axes[1].pie(channel_dist.values, labels=channel_dist.index, autopct='%1.1f%%', 
            startangle=90, colors=colors)
axes[1].set_title('Channel Preference Distribution', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
# Value distribution by channel preference
channel_value = rfm.groupby('channel_preference').agg({
    'customer_id': 'count',
    'Monetary': ['sum', 'mean'],
    'Frequency': 'mean',
    'avg_order_value': 'mean'
}).round(2)

channel_value.columns = ['_'.join(col).strip() for col in channel_value.columns.values]
channel_value = channel_value.rename(columns={'customer_id_count': 'num_customers'})
channel_value = channel_value.sort_values('Monetary_sum', ascending=False)

print("Value Metrics by Channel Preference:")
print(channel_value)

In [None]:
# Visualize revenue by channel preference
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

channel_value['Monetary_sum'].plot(kind='barh', ax=axes[0], color='darkgreen')
axes[0].set_xlabel('Total Revenue', fontsize=12)
axes[0].set_title('Total Revenue by Channel Preference', fontsize=14, fontweight='bold')

channel_value['Monetary_mean'].plot(kind='barh', ax=axes[1], color='coral')
axes[1].set_xlabel('Average Customer Value', fontsize=12)
axes[1].set_title('Average Customer Value by Channel Preference', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
# Segments by channel preference
segment_channel = pd.crosstab(rfm['Segment'], rfm['channel_preference'])

segment_channel.plot(kind='bar', stacked=True, figsize=(14, 6), colormap='Set2')
plt.title('Customer Segments by Channel Preference', fontsize=14, fontweight='bold')
plt.xlabel('Segment', fontsize=12)
plt.ylabel('Number of Customers', fontsize=12)
plt.legend(title='Channel Preference', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 7. Omni-channel Customer Analysis

In [None]:
# Focus on omni-channel customers
omni_customers = rfm[rfm['channel_preference'] == 'Omni-channel'].copy()

print(f"Omni-channel Customers: {len(omni_customers):,}")
print(f"Total Value: {omni_customers['Monetary'].sum():,.2f}")
print(f"Average Value: {omni_customers['Monetary'].mean():,.2f}")
print(f"\nComparison to overall average: {omni_customers['Monetary'].mean() / rfm['Monetary'].mean():.2f}x")

# Distribution of online vs offline spend
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Scatter: Online vs Offline value
axes[0].scatter(omni_customers['value_online'], omni_customers['value_offline'], 
                alpha=0.5, c='steelblue', s=50)
axes[0].plot([0, omni_customers[['value_online', 'value_offline']].max().max()], 
             [0, omni_customers[['value_online', 'value_offline']].max().max()], 
             'r--', alpha=0.5, label='Equal spending')
axes[0].set_xlabel('Online Value', fontsize=12)
axes[0].set_ylabel('Offline Value', fontsize=12)
axes[0].set_title('Omni-channel: Online vs Offline Spending', fontsize=14, fontweight='bold')
axes[0].legend()

# Online percentage distribution
axes[1].hist(omni_customers['online_percentage'], bins=30, color='coral', edgecolor='black')
axes[1].axvline(50, color='red', linestyle='--', label='50-50 split')
axes[1].set_xlabel('Online Percentage', fontsize=12)
axes[1].set_ylabel('Number of Customers', fontsize=12)
axes[1].set_title('Online vs Offline Balance (Omni-channel)', fontsize=14, fontweight='bold')
axes[1].legend()

plt.tight_layout()
plt.show()

## 8. Cross-Channel Behavior

In [None]:
# Average order value comparison
aov_comparison = rfm[['avg_order_value_online', 'avg_order_value_offline']].describe()

print("Average Order Value Comparison:")
print(aov_comparison)

# Visualize AOV by channel
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Box plots
data_to_plot = [rfm['avg_order_value_online'].dropna(), rfm['avg_order_value_offline'].dropna()]
axes[0].boxplot(data_to_plot, labels=['Online', 'Offline'])
axes[0].set_ylabel('Average Order Value', fontsize=12)
axes[0].set_title('AOV Distribution by Channel', fontsize=14, fontweight='bold')

# Violin plot for better distribution view
aov_data = pd.DataFrame({
    'Online': rfm['avg_order_value_online'],
    'Offline': rfm['avg_order_value_offline']
})
aov_melted = aov_data.melt(var_name='Channel', value_name='AOV')
aov_melted = aov_melted.dropna()

sns.violinplot(data=aov_melted, x='Channel', y='AOV', ax=axes[1])
axes[1].set_title('AOV Distribution (Violin Plot)', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
# Channel migration analysis
# Customers who started in one channel and moved to omni-channel
print("Channel Evolution Insights:")
print(f"\nTotal omni-channel customers: {len(omni_customers):,}")
print(f"These customers generate {omni_customers['Monetary'].sum()/rfm['Monetary'].sum()*100:.1f}% of total revenue")
print(f"Average value per omni-channel customer: {omni_customers['Monetary'].mean():.2f}")
print(f"vs single-channel average: {rfm[rfm['channel_preference'] != 'Omni-channel']['Monetary'].mean():.2f}")
print(f"Uplift: {(omni_customers['Monetary'].mean() / rfm[rfm['channel_preference'] != 'Omni-channel']['Monetary'].mean() - 1) * 100:.1f}%")

## 9. Strategic Recommendations

In [None]:
# Comprehensive business insights
total_customers = len(rfm)
total_revenue = rfm['Monetary'].sum()

print("=" * 70)
print("FLO SHOES - OMNI-CHANNEL RFM ANALYSIS")
print("=" * 70)

print(f"\n1. OVERALL METRICS")
print(f"   Total Customers: {total_customers:,}")
print(f"   Total Revenue: {total_revenue:,.2f}")
print(f"   Average Customer Value: {rfm['Monetary'].mean():,.2f}")

print(f"\n2. CHANNEL BREAKDOWN")
for channel in rfm['channel_preference'].unique():
    channel_data = rfm[rfm['channel_preference'] == channel]
    count = len(channel_data)
    revenue = channel_data['Monetary'].sum()
    avg_value = channel_data['Monetary'].mean()
    print(f"\n   {channel}:")
    print(f"   - Customers: {count:,} ({count/total_customers*100:.1f}%)")
    print(f"   - Revenue: {revenue:,.2f} ({revenue/total_revenue*100:.1f}%)")
    print(f"   - Avg Value: {avg_value:,.2f}")

print(f"\n3. OMNI-CHANNEL ADVANTAGE")
omni_value = omni_customers['Monetary'].mean()
online_only_value = rfm[rfm['channel_preference'] == 'Online-Dominant']['Monetary'].mean()
offline_only_value = rfm[rfm['channel_preference'] == 'Offline-Dominant']['Monetary'].mean()
print(f"   Omni-channel Avg: {omni_value:,.2f}")
print(f"   Online-only Avg: {online_only_value:,.2f}")
print(f"   Offline-only Avg: {offline_only_value:,.2f}")
print(f"   → Omni-channel uplift vs online-only: {(omni_value/online_only_value - 1)*100:.1f}%")
print(f"   → Omni-channel uplift vs offline-only: {(omni_value/offline_only_value - 1)*100:.1f}%")

print(f"\n4. STRATEGIC OPPORTUNITIES")
online_dominant = rfm[rfm['channel_preference'] == 'Online-Dominant']
offline_dominant = rfm[rfm['channel_preference'] == 'Offline-Dominant']
print(f"   Online-dominant customers to convert: {len(online_dominant):,}")
print(f"   Potential value if 20% convert to omni-channel:")
print(f"     Current: {online_dominant['Monetary'].sum():,.2f}")
print(f"     Potential: {online_dominant['Monetary'].sum() * 1.2 * (omni_value/online_only_value):,.2f}")
print(f"     Uplift: {online_dominant['Monetary'].sum() * 0.2 * (omni_value/online_only_value - 1):,.2f}")

print(f"\n5. SEGMENT-CHANNEL INSIGHTS")
for segment in ['Champions', 'Loyal Customers', 'At Risk']:
    seg_data = rfm[rfm['Segment'] == segment]
    if len(seg_data) > 0:
        print(f"\n   {segment}:")
        channel_dist = seg_data['channel_preference'].value_counts()
        for channel, count in channel_dist.items():
            print(f"     - {channel}: {count} ({count/len(seg_data)*100:.1f}%)")

print("\n" + "=" * 70)
print("\nKEY RECOMMENDATIONS:")
print("1. Encourage online customers to visit stores with exclusive in-store offers")
print("2. Promote online shopping to offline customers via mobile app incentives")
print("3. Create seamless omni-channel experiences (buy online pickup in store)")
print("4. Target high-value single-channel customers for cross-channel campaigns")
print("5. Develop channel-specific loyalty programs that reward omni-channel behavior")
print("=" * 70)

## 10. Export Results

In [None]:
# Export comprehensive results
rfm.to_csv('flo_shoes_omnichannel_rfm.csv', index=False)
print("RFM analysis exported to: flo_shoes_omnichannel_rfm.csv")

# Export channel analysis
channel_value.to_csv('flo_channel_analysis.csv')
print("Channel analysis exported to: flo_channel_analysis.csv")

# Export high-value omni-channel customers
top_omni = omni_customers.nlargest(100, 'Monetary')
top_omni.to_csv('flo_top_omnichannel_customers.csv', index=False)
print("Top omni-channel customers exported")