This notebook contains the steps needed to load the CSV file into BigQuery as embeddings

### Install required packages

In [1]:
!pip install langchain-google-genai langchain-google-vertexai langchain-google-community langchain-core --quiet


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


### Setup variables

In [2]:
PROJECT_ID='qwiklabs-gcp-03-7a8bdf6e2e2c'
LOCATION='us'
DATASET = "AlaskaDept"
TABLE = "faqs"
TABLE_EMBEDDED = "faqs_embedded"

### Construct a BigQuery client object.

In [None]:
from google.cloud import bigquery

client = bigquery.Client(project=PROJECT_ID)

### Create a new dataset in BigQuery

In [6]:
# Create dataset
dataset_id = "{}.{}".format(client.project, DATASET)

# Construct a full Dataset object to send to the API.
dataset = bigquery.Dataset(dataset_id)

# Specify the geographic location where the dataset should reside.
dataset.location = "US"

dataset = client.create_dataset(dataset, timeout=30)  # Make an API request.
print("Created dataset {}.{}".format(client.project, dataset.dataset_id))

Created dataset qwiklabs-gcp-03-7a8bdf6e2e2c.AlaskaDept


### Create a new table

In [7]:
# Create table
table_id = "{}.{}.{}".format(client.project, dataset.dataset_id, TABLE)

table = bigquery.Table(table_id)
table = client.create_table(table)  # API request

print(f"Created {table_id}.")

Created qwiklabs-gcp-03-7a8bdf6e2e2c.AlaskaDept.faqs.


### Load CSV into table

In [8]:
# Load data from CSV
job_config = bigquery.LoadJobConfig(
    schema=[
        bigquery.SchemaField("question", "STRING"),
        bigquery.SchemaField("answer", "STRING"),
    ],
    skip_leading_rows=1,
    # The source format defaults to CSV, so the line below is optional.
    source_format=bigquery.SourceFormat.CSV,
)
uri = "gs://labs.roitraining.com/alaska-dept-of-snow/alaska-dept-of-snow-faqs.csv"

load_job = client.load_table_from_uri(
    uri, table_id, job_config=job_config
)  # Make an API request.

load_job.result()  # Waits for the job to complete.

destination_table = client.get_table(table_id)  # Make an API request.
print("Loaded {} rows.".format(destination_table.num_rows))

Loaded 50 rows.


### Create a Cloud resource connection

In [None]:
!bq mk --connection --connection_type=CLOUD_RESOURCE --location=us --project_id={PROJECT_ID} "embedding_conn"
!bq show --location=us --connection --project_id={PROJECT_ID} "embedding_conn"

In [None]:
# Update you service acccount here
connection_service_account = "bqcx-569779670169-huh6@gcp-sa-bigquery-condel.iam.gserviceaccount.com" # @param {"type": "string"}
connection_member = f"serviceAccount:{connection_service_account}"

!gcloud projects add-iam-policy-binding {PROJECT_ID} --member={connection_member} --role='roles/aiplatform.user' --condition=None --quiet
# !gcloud projects add-iam-policy-binding {PROJECT_ID} --member={connection_member} --role='roles/bigquery.dataowner' --condition=None --quiet

### Create embedding model

In [9]:
query = f"""
CREATE OR REPLACE MODEL `{DATASET}.Embeddings`
REMOTE WITH CONNECTION `us.embedding_conn` OPTIONS (ENDPOINT = 'text-embedding-005');
"""

query_job = client.query(query)  # API request
query_job.result()  # Waits for the query to complete

print("Embeddings table created successfully.")


Embeddings table created successfully.


### Generate embeddings

In [12]:
query = f"""
CREATE OR REPLACE TABLE `{DATASET}.{TABLE_EMBEDDED}` AS
SELECT question, answer, content, ml_generate_embedding_result AS embedding
FROM ML.GENERATE_EMBEDDING(
  MODEL `{DATASET}.Embeddings`,
  (
    SELECT CONCAT(question, ' ', answer) AS content,
           question,
           answer
    FROM `{DATASET}.{TABLE}`
  )
);
"""

query_job = client.query(query)  # API request
query_job.result()  # Waits for the query to complete

print("Embeddings generated successfully.")

Embeddings generated successfully.


#### Data is loaded to BigQuery with embeddings