# 💰 Spending Prediction with Linear Regression
This notebook loads transactional data, computes features (Recency, Frequency, Tenure), trains a linear regression model to predict Monetary value, and allows user input for prediction.

In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error

# 📥 Load your dataset (adjust path if needed)
data_path = "cleaned_dataset.xlsx"  # Ensure this file exists in the same directory
raw_data = pd.read_excel(r'C:\Users\atuls\OneDrive\Desktop\RIAA-2\data\cleaned_dataset.xlsx')

# ✅ Ensure InvoiceDate is datetime
raw_data['InvoiceDate'] = pd.to_datetime(raw_data['InvoiceDate'])

# ✅ Aggregate to RFM-like data per customer
grouped = raw_data.groupby('CustomerID').agg(
    Recency=('InvoiceDate', lambda x: (raw_data['InvoiceDate'].max() - x.max()).days),
    Frequency=('InvoiceNo', 'nunique'),
    Monetary=('Total_amount', 'sum'),
    FirstPurchase=('InvoiceDate', 'min'),
    LastPurchase=('InvoiceDate', 'max')
).reset_index()

# ✅ Compute Tenure as (LastPurchase - FirstPurchase)
grouped['Tenure'] = (grouped['LastPurchase'] - grouped['FirstPurchase']).dt.days
grouped['Tenure'] = grouped['Tenure'].apply(lambda x: max(x, 1))

# ✅ Prepare features and target
feature_cols = ['Recency', 'Frequency', 'Tenure']
X = grouped[feature_cols]
y = grouped['Monetary']

print(f"✅ Dataset shape for training: {X.shape}")

# ✅ Train-test split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ✅ Train model
spend_model = LinearRegression()
spend_model.fit(x_train, y_train)

# ✅ Evaluate
y_pred_test = spend_model.predict(x_test)
r2 = r2_score(y_test, y_pred_test)
mae = mean_absolute_error(y_test, y_pred_test)
print(f"📈 Model R² Score: {r2:.4f}")
print(f"📉 Mean Absolute Error: ${mae:.2f}")

✅ Dataset shape for training: (2381, 3)
📈 Model R² Score: 0.0789
📉 Mean Absolute Error: $54.69


In [14]:
# ✨ User input for prediction
user_input = input("Enter values as Recency, Frequency, Tenure (comma separated): ")
check = list(map(float, user_input.split(',')))

# ✅ Predict
predicted_spending = spend_model.predict([check])[0]  # wrap check in a list to make it 2D
predicted_spending = max(0, predicted_spending)      # ensure no negative prediction
print(f"💸 Estimated Spending (Monetary): ${predicted_spending:.2f}")


💸 Estimated Spending (Monetary): $210.42


