In [1]:
from google.cloud import bigquery
from google.cloud import aiplatform
import bigframes.pandas as bpd
import pandas as pd
from vertexai.language_models._language_models import TextGenerationModel
from bigframes.ml.cluster import KMeans
from bigframes.ml.model_selection import train_test_split

## Define variables and initiate the BigQuery and Vertex AI connection


In [2]:
project_id = 'qwiklabs-gcp-04-639f9fc4e4fb'
dataset_name = "ecommerce"
model_name = "customer_segmentation_model"
table_name = "customer_stats"
location = "us-central1"
client = bigquery.Client(project=project_id)
aiplatform.init(project=project_id, location=location)

## Create and import the ecommerce.customer_stats table

store data from thelook_ecommerce BigQuery public dataset into a new table entitled customer_status in your ecommerce dataset

In [3]:
%%bigquery
CREATE OR REPLACE TABLE ecommerce.customer_stats AS
SELECT
  user_id,
  DATE_DIFF(CURRENT_DATE(), CAST(MAX(order_created_date) AS DATE), day) AS days_since_last_order, ---RECENCY
  COUNT(order_id) AS count_orders, --FREQUENCY
  AVG(sale_price) AS average_spend --MONETARY
  FROM (
      SELECT
        user_id,
        order_id,
        sale_price,
        created_at AS order_created_date
        FROM `bigquery-public-data.thelook_ecommerce.order_items`
        WHERE
        created_at
            BETWEEN '2022-01-01' AND '2023-01-01'
  )
GROUP BY user_id;

Query is running:   0%|          |

In [3]:
# prompt: Convert the table ecommerce.customer_stats to a BigQuery DataFrames dataframe and show the top 10 records

df = bpd.read_gbq(f"{project_id}.{dataset_name}.{table_name}")
df.head(10)

Unnamed: 0,user_id,days_since_last_order,count_orders,average_spend
0,5456,781,3,74.083334
1,23723,861,4,38.87
2,31793,927,1,40.0
3,73288,849,1,44.0
4,18495,806,2,30.475
5,83310,617,1,109.989998
6,64752,646,1,68.949997
7,12666,865,2,41.225
8,41855,942,1,41.950001
9,5557,842,1,44.5


## Generate the K-means clustering model

Create a K-means clustering model to split the customer data into clusters based on fields like order recency, order count, and spend, and you will then visualize these as groups within a chart directly within the notebook.

In [6]:
# prompt: 1. Split df into test and training data for a K-means clustering algorithm store these as df_test_ and df_train.
# 2. Create a K-means cluster model using bigframes.ml.cluster KMeans with 5 clusters.
# 3. Save the model to BigQuery in a model called ecommerce.model_name using the to_gbq method.

#prompt: 1. Split df into test and training data for a K-means clustering algorithm store these as df_test_ and df_train. 2. Create a K-means cluster model using bigframes.ml.cluster KMeans with 5 clusters. 3. Save the model to BigQuery in a model called ecommerce.model_name using the to_gbq method.

df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
model = KMeans(n_clusters=5)
model.fit(df_train)
model.to_gbq(f"{project_id}.{dataset_name}.{model_name}")

KMeans(distance_type='EUCLIDEAN', init='KMEANS_PLUS_PLUS', n_clusters=5)

In [7]:
# prompt: Call the K-means prediction model on the df dataframe, and store the results as predictions_df and show the first 10 records.

predictions_df = model.predict(df)
predictions_df.head(10)

Unnamed: 0,CENTROID_ID,NEAREST_CENTROIDS_DISTANCE,user_id,days_since_last_order,count_orders,average_spend
0,1,"[{'CENTROID_ID': 1, 'DISTANCE': 1.872473116080...",5456,781,3,74.083334
1,1,"[{'CENTROID_ID': 1, 'DISTANCE': 1.786726963691...",23723,861,4,38.87
2,5,"[{'CENTROID_ID': 5, 'DISTANCE': 1.189322904165...",31793,927,1,40.0
3,5,"[{'CENTROID_ID': 5, 'DISTANCE': 0.593149003624...",73288,849,1,44.0
4,3,"[{'CENTROID_ID': 3, 'DISTANCE': 1.131863759969...",18495,806,2,30.475
5,4,"[{'CENTROID_ID': 4, 'DISTANCE': 1.355081598566...",83310,617,1,109.989998
6,4,"[{'CENTROID_ID': 4, 'DISTANCE': 0.616734556708...",64752,646,1,68.949997
7,3,"[{'CENTROID_ID': 3, 'DISTANCE': 1.587331978554...",12666,865,2,41.225
8,5,"[{'CENTROID_ID': 5, 'DISTANCE': 1.000212014580...",41855,942,1,41.950001
9,3,"[{'CENTROID_ID': 3, 'DISTANCE': 1.319170114332...",5557,842,1,44.5


# Create a visualization of the K-means clustering model results



In [None]:
# prompt: Using predictions_df, and matplotlib, generate a scatterplot. 2. On the x-axis of the scatterplot, display days_since_last_order and on the y-axis, display average_spend from predictions_df. 3. Color by cluster. The chart should be titled "Attribute grouped by K-means cluster."

import matplotlib.pyplot as plt

plt.scatter(predictions_df['days_since_last_order'], predictions_df['average_spend'], c=predictions_df['CENTROID_ID'])
plt.xlabel("days_since_last_order")
plt.ylabel("average_spend")
plt.title("Attribute grouped by K-means Cluster")
plt.show()

# Generate insights from the results of the model

* Summarize each cluster generated from the K-means model
* Define a prompt for the marketing campaign
* Generate the marketing campaign using the text-bison model

In [10]:
query = """
SELECT
 CONCAT('cluster ', CAST(centroid_id as STRING)) as centroid,
 average_spend,
 count_orders,
 days_since_last_order
FROM (
 SELECT centroid_id, feature, ROUND(numerical_value, 2) as value
 FROM ML.CENTROIDS(MODEL `{0}.{1}`)
)
PIVOT (
 SUM(value)
 FOR feature IN ('average_spend',  'count_orders', 'days_since_last_order')
)
ORDER BY centroid_id
""".format(dataset_name, model_name)

df_query = client.query(query).to_dataframe()
df_query.head()

Unnamed: 0,centroid,average_spend,count_orders,days_since_last_order
0,cluster 1,58.49,3.72,709.47
1,cluster 2,226.12,1.24,744.15
2,cluster 3,48.24,1.29,720.19
3,cluster 4,47.62,1.3,672.05
4,cluster 5,49.39,1.32,871.19


You should see the clusters summarized in a table. Some insights you can get from this table are that some clusters have a higher average spend, and others have a higher count of orders.

Next, you will convert the data frame into a string, so you can pass it to your large language model call.

In [11]:
df_query.to_string(header=False, index=False)

cluster_info = []
for i, row in df_query.iterrows():
 cluster_info.append("{0}, average spend ${2}, count of orders per person {1}, days since last order {3}"
  .format(row["centroid"], row["count_orders"], row["average_spend"], row["days_since_last_order"]) )

cluster_info = (str.join("\n", cluster_info))
print(cluster_info)

cluster 1, average spend $58.49, count of orders per person 3.72, days since last order 709.47
cluster 2, average spend $226.12, count of orders per person 1.24, days since last order 744.15
cluster 3, average spend $48.24, count of orders per person 1.29, days since last order 720.19
cluster 4, average spend $47.62, count of orders per person 1.3, days since last order 672.05
cluster 5, average spend $49.39, count of orders per person 1.32, days since last order 871.19


# Define a prompt for the marketing campaign

In [12]:
prompt = f"""
You're a creative brand strategist, given the following clusters, come up with \
creative brand persona, a catchy title, and next marketing action, \
explained step by step.

Clusters:
{cluster_info}

For each Cluster:
* Title:
* Persona:
* Next marketing step:
"""

In [13]:
#prompt:  Use the Vertex AI language_models API to call the PaLM2 text-bison model and generate a marketing campaign using the variable prompt. Use the following model settings: max_output_tokens=1024, temperature=0.4

model = TextGenerationModel.from_pretrained("text-bison")
response = model.predict(prompt, max_output_tokens=1024, temperature=0.4)
print(response.text)

 **Cluster 1: The Occasional Shoppers**

* **Title:** The Occasional Shoppers
* **Persona:** These customers are budget-conscious and make infrequent purchases. They are likely to be retirees or students.
* **Next marketing step:** Offer discounts or promotions to encourage them to make more frequent purchases.

**Cluster 2: The Big Spenders**

* **Title:** The Big Spenders
* **Persona:** These customers are high-income earners who are willing to spend money on luxury items. They are likely to be professionals or business owners.
* **Next marketing step:** Offer exclusive products or services to appeal to their sense of luxury.

**Cluster 3: The Loyal Customers**

* **Title:** The Loyal Customers
* **Persona:** These customers are repeat buyers who are satisfied with your products and services. They are likely to be families or young professionals.
* **Next marketing step:** Offer loyalty programs or rewards to show your appreciation for their continued business.

**Cluster 4: The Barg