# Basic CLV Model for Practice

In [2]:
# Import Libraries
import numpy as np
import pandas as pd

In [3]:
# Build Dataset
np.random.seed(42)

n_custs = 100

customer_id = np.arange(1, n_custs + 1)

# | Column                  | Description                        |
# | ----------------------- | ---------------------------------- |
# | `customer_id`           | ID from 1 to 100                   |
# | `num_orders`            | Number of purchases (0–10)         |
# | `avg_order_value`       | Average spend per order ($10–$100) |
# | `days_since_last_order` | Recency (1–180 days)               |
# | `tenure_days`           | How long they’ve been a customer   |

num_orders = np.random.randint(0, 11, n_custs)
avg_order_value = np.random.uniform(10, 100, n_custs)
days_since_last_order = np.random.randint(1, 181, n_custs)
tenure_days = np.random.randint(30, 1096, n_custs)

# weekly_sales = (
#     2.5 * paid_search +
#     1.8 * paid_social +
#     np.random.normal(0, 50_000, n_custs)
# )

df = pd.DataFrame({
    "customer_id": customer_id,
    "num_orders": num_orders,
    "avg_order_value": avg_order_value,
    "days_since_last_order": days_since_last_order,
    "tenure_days": tenure_days
})

print(df.head())
print("")
df.info()

   customer_id  num_orders  avg_order_value  days_since_last_order  \
0            1           6        93.367079                     39   
1            2           3        68.596932                     82   
2            3          10        92.346371                    104   
3            4           7        86.503472                    129   
4            5           4        50.450561                     11   

   tenure_days  
0          729  
1         1022  
2          220  
3          282  
4         1010  

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   customer_id            100 non-null    int32  
 1   num_orders             100 non-null    int32  
 2   avg_order_value        100 non-null    float64
 3   days_since_last_order  100 non-null    int32  
 4   tenure_days            100 non-null    int32  
dtypes: f

In [5]:
# Simple CLV = num_orders * avg_order_value
df['simple_clv'] = df['num_orders'] * df['avg_order_value']
print("Simple CLV Average:", df['simple_clv'].mean())

Simple CLV Average: 292.88205354275374


In [7]:
# Discounted CLV = simple_clv / (1 + discount_rate)^(days_since_last_order/365)
df['decay_factor'] = np.exp(-0.01 * df['days_since_last_order'])
df['clv_discounted'] = df['simple_clv'] * df['decay_factor']
print("Discounted CLV Average:", df['clv_discounted'].mean())

Discounted CLV Average: 133.66134286938606


In [9]:
df.sort_values(by='decay_factor', ascending=False).head()

Unnamed: 0,customer_id,num_orders,avg_order_value,days_since_last_order,tenure_days,simple_clv,clv_discounted,decay_factor
16,17,2,31.238643,2,814,62.477286,61.240152,0.980199
83,84,0,52.327057,4,183,0.0,0.0,0.960789
91,92,10,87.252292,6,941,872.522924,821.711145,0.941765
10,11,10,34.724961,7,715,347.249614,323.773394,0.932394
72,73,7,41.056412,8,663,287.394886,265.298917,0.923116
