In [None]:
-- Step 1: Create RFM Table
CREATE OR REPLACE TABLE `dde-sbx-kenneth-lam-s001.TEST.rfm_table` AS
SELECT
  phone_number,
  MAX(DATE_DIFF(CURRENT_DATE(), purchase_date, DAY)) AS recency,
  COUNT(distinct owner_asset_id) AS frequency,
  SUM(unit_price) AS monetary
FROM
  `dde-sbx-kenneth-lam-s001.TEST.owner_asset_w_price`
GROUP BY
  phone_number;

-- Step 2: Create K-means Model
CREATE OR REPLACE MODEL `dde-sbx-kenneth-lam-s001.TEST.customer_segmentation_model`
OPTIONS(
  model_type='kmeans',
  num_clusters=5
) AS
SELECT
  recency,
  frequency,
  monetary
FROM
  `dde-sbx-kenneth-lam-s001.TEST.rfm_table`;

-- Step 3: Predict Customer Segments (With Explicit Casting)
CREATE OR REPLACE TABLE `dde-sbx-kenneth-lam-s001.TEST.customer_segments` AS
SELECT 
  rfm.*,
  pred.CENTROID_ID AS cluster_id
FROM 
  `dde-sbx-kenneth-lam-s001.TEST.rfm_table` rfm,
  ML.PREDICT(MODEL `dde-sbx-kenneth-lam-s001.TEST.customer_segmentation_model`,
    (SELECT 
      CAST(recency AS FLOAT64) AS recency,
      CAST(frequency AS FLOAT64) AS frequency,
      CAST(monetary AS FLOAT64) AS monetary
    FROM `dde-sbx-kenneth-lam-s001.TEST.rfm_table`)
  ) pred;

-- Step 4: Identify Likely Purchasers for Next Month
CREATE OR REPLACE TABLE `dde-sbx-kenneth-lam-s001.TEST.likely_purchasers` AS
SELECT
  customer_id
FROM
  `dde-sbx-kenneth-lam-s001.TEST.customer_segments`
WHERE
  predicted_cluster IN (
    SELECT
      predicted_cluster
    FROM
      `dde-sbx-kenneth-lam-s001.TEST.customer_segments`
    GROUP BY
      predicted_cluster
    ORDER BY
      AVG(recency) ASC
    LIMIT 2  -- Adjust based on the number of clusters indicating recent purchases
  );