# Online Factorization Model with Vowpal Wabbit

This notebook implements an online model to rank products (or ads) for each client. It:
1. Loads and preprocesses the dataset (with time-based and expanding features).
2. Encodes each transaction into a Vowpal Wabbit (VW) input string.
3. Trains a VW model incrementally.
4. Demonstrates scoring and online updates.

Make sure you have Vowpal Wabbit installed (e.g., run `%pip install vowpalwabbit` if needed).

In [None]:
# Install Vowpal Wabbit if needed (uncomment the next line)
# %pip install vowpalwabbit

import pandas as pd
import numpy as np
import os
from vowpalwabbit import pyvw

print("Environment setup complete.")

## 1. Load & Preprocess Data

In [None]:
# Load the dataset
final_df = pd.read_parquet('final_df.parquet')
print("Initial shape:", final_df.shape)
display(final_df.head())

# Convert TransactionDate to datetime and sort
final_df['TransactionDate'] = pd.to_datetime(final_df['TransactionDate'])
final_df = final_df.sort_values(by='TransactionDate').reset_index(drop=True)
print("Shape after sorting:", final_df.shape)
print(final_df['TransactionDate'].describe())

## 8. Encode Data for Online Learning with Vowpal Wabbit

In [None]:
def to_vw_format(row):
    """
    Convert a single transaction row into VW format:
    <label> |u <user features> |i <item features> |c <context features>
    """
    label = 1.0  # positive example
    
    # User features (namespace |u)
    user_feats = []
    user_feats.append(f"u_clientID_{row['ClientID']}")
    user_feats.append(f"Universe_{row.get('Universe', 'UNK')}")
    gender = row.get("ClientGender", "UNK")
    if pd.isnull(gender):
        gender = "UNK"
    user_feats.append(f"Gender_{gender}")
    age = row.get("Age", np.nan)
    if pd.isnull(age):
        age_bucket = "AGE_UNKNOWN"
    else:
        age_bucket = f"AGE_{int(age)//10*10}"
    user_feats.append(age_bucket)
    
    # Item features (namespace |i)
    item_feats = []
    item_feats.append(f"i_productID_{row['ProductID']}")
    brand = row.get("Brand", "UNK").replace(" ", "_")
    item_feats.append(f"Brand_{brand}")
    category = row.get("Category", "UNK").replace(" ", "_")
    item_feats.append(f"Cat_{category}")
    
    # Context features (namespace |c)
    context_feats = []
    context_feats.append(f"DOW_{row.get('DayOfWeek', -1)}")
    season = row.get("Season", "UNK").replace(" ", "_")
    context_feats.append(f"Season_{season}")
    avg_amt = row.get("AverageAmountPerTransactionSoFar", 0.0)
    context_feats.append(f"avgAmt:{avg_amt:.2f}")
    
    line = f"{label} |u " + " ".join(user_feats) + " |i " + " ".join(item_feats) + " |c " + " ".join(context_feats)
    return line

# Generate VW training lines for all rows
vw_train_lines = final_df.apply(to_vw_format, axis=1).tolist()
print(f"Generated {len(vw_train_lines)} VW training lines.")
print("Example line:", vw_train_lines[0])

## 9. Train the Online Model with Vowpal Wabbit

In [None]:
model = pyvw.Workspace(
    learning_rate=0.5,
    passes=1,
    bit_precision=28,
    loss_function="logistic",
    link="logistic",
    quiet=True,
    k=True,
    random_seed=42
)

for ln in vw_train_lines:
    model.learn(ln)

print("VW model training complete!")

## 10. Score Candidate Products for a Given User

In [None]:
def predict_score(vw_model, user_info, item_info, context_info):
    # Build a VW inference line without a label (using 0 as placeholder)
    line = "0 |u "
    line += f"u_clientID_{user_info.get('ClientID', 'UNK')} "
    line += f"Universe_{user_info.get('Universe', 'UNK')} "
    line += f"Gender_{user_info.get('ClientGender', 'UNK')} "
    age = user_info.get('Age', None)
    if age is None:
        age_bucket = "AGE_UNKNOWN"
    else:
        age_bucket = f"AGE_{int(age)//10*10}"
    line += age_bucket + " "
    
    line += "|i "
    line += f"i_productID_{item_info.get('ProductID', 'UNK')} "
    brand = item_info.get('Brand', 'UNK').replace(" ", "_")
    line += f"Brand_{brand} "
    category = item_info.get('Category', 'UNK').replace(" ", "_")
    line += f"Cat_{category} "
    
    line += "|c "
    line += f"DOW_{context_info.get('DayOfWeek', -1)} "
    season = context_info.get('Season', 'UNK').replace(" ", "_")
    line += f"Season_{season} "
    line += f"avgAmt:{context_info.get('avgAmt', 0.0):.2f}"
    
    pred = vw_model.predict(line)
    return pred

# Example candidate items for a given user
example_user = {"ClientID": 1000001, "Universe": "Men", "ClientGender": "M", "Age": 29}
example_context = {"DayOfWeek": 3, "Season": "Spring", "avgAmt": 45.0}

candidate_items = [
    {"ProductID": "12345", "Brand": "Nike", "Category": "Football"},
    {"ProductID": "23456", "Brand": "Adidas", "Category": "Running"},
    {"ProductID": "34567", "Brand": "Puma", "Category": "Basketball"},
    {"ProductID": "45678", "Brand": "Head", "Category": "Tennis"},
    {"ProductID": "56789", "Brand": "Unknown", "Category": "Beach"}
]

scores = []
for item in candidate_items:
    s = predict_score(model, example_user, item, example_context)
    scores.append((item, s))

scores_sorted = sorted(scores, key=lambda x: x[1], reverse=True)
print("Ranking for user 1000001:")
for item, score in scores_sorted:
    print(f"  Product {item['ProductID']} -> Score: {score:.4f}")

## 11. Online Update Example

This cell shows how you can update the model in real time with a new transaction.

In [None]:
new_transaction = {
    "ClientID": 888888,
    "Universe": "Women",
    "ClientGender": "F",
    "Age": 31,
    "ProductID": "55555",
    "Brand": "Reebok",
    "Category": "Handball",
    "DayOfWeek": 4,
    "Season": "Spring",
    "AverageAmountPerTransactionSoFar": 22.5
}

vw_line_new = f"1 |u u_clientID_{new_transaction['ClientID']} "
vw_line_new += f"Universe_{new_transaction['Universe']} "
vw_line_new += f"Gender_{new_transaction['ClientGender']} "
vw_line_new += f"AGE_{int(new_transaction['Age'])//10*10} "
vw_line_new += "|i i_productID_" + str(new_transaction['ProductID']) + " "
vw_line_new += f"Brand_{new_transaction['Brand'].replace(' ', '_')} "
vw_line_new += f"Cat_{new_transaction['Category'].replace(' ', '_')} "
vw_line_new += "|c "
vw_line_new += f"DOW_{new_transaction['DayOfWeek']} "
vw_line_new += f"Season_{new_transaction['Season']} "
vw_line_new += f"avgAmt:{new_transaction['AverageAmountPerTransactionSoFar']:.2f}"

print("New VW line:", vw_line_new)
model.learn(vw_line_new)
print("Model updated with new transaction.")

## 12. Save Final DataFrame and Clean Up

In [None]:
final_df.to_parquet("final_df.parquet", index=False)
print("Saved final_df as final_df.parquet")
os.remove("final_dataframe.csv")
print("Removed final_dataframe.csv")

## End of Notebook

This completes the online recommendation pipeline using Vowpal Wabbit.