In [7]:
import pandas as pd  
df = pd.read_csv("C:\\Users\\Hi\\Downloads\\online_retail_II(Year 2010-2011).csv", encoding="unicode_escape", parse_dates=['InvoiceDate'])


In [11]:
print(df.columns.tolist())


['Invoice', 'StockCode', 'Description', 'Quantity', 'InvoiceDate', 'Price', 'Customer ID', 'Country']


In [13]:
df = df.rename(columns={
    'Customer ID': 'CustomerID',
    'Price': 'UnitPrice'
})


In [15]:
df.columns = df.columns.str.strip().str.replace(' ', '').str.lower()
# Now: ['invoice','stockcode','description','quantity','invoicedate','price','customerid','country']


In [17]:
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
# => ['invoice', 'stockcode', 'description', 'quantity', 'invoice_date', 'price', 'customer_id', 'country']


In [19]:
df = pd.read_csv("C:\\Users\\Hi\\Downloads\\online_retail_II(Year 2010-2011).csv"
    ,
    encoding="unicode_escape",
    names=['invoice','stockcode','description','quantity','invoicedate','unitprice','customerid','country'],
    header=0,  # treat the first row as header
    parse_dates=['invoicedate']
)


In [21]:
df = df[df['customerid'].notna()]
df = df[df['quantity'] > 0]
df = df[df['unitprice'] > 0]
df = df[~df['invoice'].str.startswith('C', na=False)]
df['totalprice'] = df['quantity'] * df['unitprice']


In [23]:
print(df.columns.tolist())


['invoice', 'stockcode', 'description', 'quantity', 'invoicedate', 'unitprice', 'customerid', 'country', 'totalprice']


In [25]:
from lifetimes.utils import summary_data_from_transaction_data

summary = summary_data_from_transaction_data(
    df,
    customer_id_col='customerid',
    datetime_col='invoicedate',
    monetary_value_col='totalprice',
    observation_period_end=df['invoicedate'].max()
)

# Optionally, filter to repeat customers
summary = summary[summary['frequency'] > 0]


In [27]:
from lifetimes import BetaGeoFitter

bgf = BetaGeoFitter(penalizer_coef=0.001)
bgf.fit(summary['frequency'], summary['recency'], summary['T'])

# Predict next 6 months (≈180 days)
summary['predicted_purchases_6mo'] = bgf.conditional_expected_number_of_purchases_up_to_time(
    180,
    summary['frequency'],
    summary['recency'],
    summary['T']
)


In [29]:
from lifetimes import GammaGammaFitter

# Ensure frequency > 0
monetary_summary = summary[summary['frequency'] > 0]

ggf = GammaGammaFitter(penalizer_coef=0.01)
ggf.fit(monetary_summary['frequency'], monetary_summary['monetary_value'])

# Expected transaction value
summary.loc[monetary_summary.index, 'expected_avg_purchase'] = ggf.conditional_expected_average_profit(
    monetary_summary['frequency'],
    monetary_summary['monetary_value']
)


In [31]:
summary['clv_6mo'] = ggf.customer_lifetime_value(
    bgf,
    summary['frequency'],
    summary['recency'],
    summary['T'],
    summary['monetary_value'],
    time=6,        # months
    freq='D',
    discount_rate=0.01
)


In [33]:
summary['segment'] = pd.qcut(
    summary['clv_6mo'],
    4,
    labels=['Hibernating', 'Need Attention', 'Loyal', 'Champion']
)


In [35]:
# app.py
import streamlit as st
import pandas as pd
from lifetimes import BetaGeoFitter, GammaGammaFitter
from lifetimes.utils import summary_data_from_transaction_data

st.title("Simple CLV Explorer")

uploaded = st.file_uploader("Upload cleaned CSV", type="csv")
if uploaded:
    df = pd.read_csv(uploaded, parse_dates=['invoicedate'])
    df = df[df['frequency'] > 0]  # ensure repeat purchases

    summary = summary_data_from_transaction_data(
        df, 'customerid', 'invoicedate', 'totalprice',
        observation_period_end=df['invoicedate'].max()
    )

    bgf = BetaGeoFitter(penalizer_coef=0.0)
    bgf.fit(summary['frequency'], summary['recency'], summary['T'])

    ggf = GammaGammaFitter(penalizer_coef=0.0)
    ggf.fit(summary['frequency'], summary['monetary_value'])

    months = st.slider("Months to predict CLV for:", 1, 12, 6)
    summary['clv'] = ggf.customer_lifetime_value(
        bgf, summary['frequency'], summary['recency'], summary['T'],
        summary['monetary_value'], time=months, freq='D', discount_rate=0.01
    )

    top = summary.nlargest(10, 'clv')[['clv']]
    st.table(top)

    st.download_button("Download CLV results", summary.to_csv(), "clv.csv")


2025-07-18 16:00:33.545 
  command:

    streamlit run C:\Users\Hi\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
