In [3]:
# !pip install datasets --user

## Example product heirarchy and and summarization

1) Get data from here https://huggingface.co/datasets/c-s-ale/Product-Descriptions-and-Ads/viewer/c-s-ale--Product-Descriptions-and-Ads/train 
2) Load to BQ and establish LLM model
3) Predict and from raw descriptions to structured data with BQ LLMs

In [30]:
from datasets import load_dataset

dataset = load_dataset("c-s-ale/Product-Descriptions-and-Ads")

Found cached dataset parquet (/home/jupyter/.cache/huggingface/datasets/c-s-ale___parquet/c-s-ale--Product-Descriptions-and-Ads-4c74ccc0a63de502/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)


  0%|          | 0/2 [00:00<?, ?it/s]

In [31]:
dataset

DatasetDict({
    train: Dataset({
        features: ['product', 'description', 'ad'],
        num_rows: 90
    })
    test: Dataset({
        features: ['product', 'description', 'ad'],
        num_rows: 10
    })
})

In [39]:
import pandas as pd
train_df = pd.DataFrame(dataset['train'])
train_df['split'] = 'train'

test_df = pd.DataFrame(dataset['test'])
test_df['split'] = 'test'

full_df = pd.concat([train_df, test_df])

In [40]:
full_df

Unnamed: 0,product,description,ad,split
0,Harem pants,"A style of pants with a dropped crotch, loose...","Discover Harem Pants! Unique, stylish bohemian...",train
1,Fringe skirt,A skirt featuring fringe detailing on the bot...,Introducing our fabulous Fringe Skirt! Step ou...,train
2,Gingham dress,A dress featuring a two-toned checkered patte...,Introducing the Gingham Dress: Timeless & Chic...,train
3,Duster coat,"A long, lightweight coat that falls below the...",Discover unparalleled style & comfort with our...,train
4,Henley shirt,"A collarless, button-up shirt with a small pl...",Discover timeless style & unbeatable comfort w...,train
...,...,...,...,...
5,Printed leggings,"Stretchy, close-fitting pants featuring bold ...",Discover ultimate style with our Printed Leggi...,test
6,Puffer jacket,"A warm, insulated jacket with a quilted desig...","Stay cozy with our Puffer Jacket, designed to ...",test
7,Espadrille sandals,"Casual sandals with a woven base, perfect for...",Discover the perfect summer accessory - Espadr...,test
8,Paisley-print dress,"A dress featuring the intricate, teardrop-sha...",Discover bohemian elegance: Our Paisley-print ...,test


In [42]:
full_df.to_gbq('genai_cap_v1.prod_raw', 
                 'cpg-cdp',
                 chunksize=10000, 
                 if_exists='replace',
                 )

100%|██████████| 1/1 [00:00<00:00, 5629.94it/s]


#### Next go here to set up a Vertex connection to BQ

https://cloud.google.com/bigquery/docs/bigquery-ml-remote-model-tutorial

In [None]:
!bq mk --connection --location=us --project_id=cpg-cdp \
    --connection_type=CLOUD_RESOURCE bq-vertex

!bq show --connection 

In [None]:
%%bigquery
CREATE MODEL `cpg-cdp.genai_cap_v1.llm_model1`

REMOTE WITH CONNECTION `cpg-cdp.us.bq-vertex`

OPTIONS (remote_service_type = 'CLOUD_AI_LARGE_LANGUAGE_MODEL_V1');

In [44]:
%%bigquery predictions
SELECT * FROM

ML.GENERATE_TEXT (

MODEL `genai_cap_v1.llm_model1`,

(SELECT CONCAT ("Provide a summary of the product attributes and heirarchy for product: ", product, " ", description) AS prompt

FROM `cpg-cdp.genai_cap_v1.prod_raw`
),

STRUCT ( 0.2 AS temperature,

  1024 AS max_output_tokens,

  0.8 AS top_p,

  40 AS top_k)
  )

Query is running:   0%|          |

Downloading:   0%|          |

In [45]:
predictions

Unnamed: 0,ml_generate_text_result,ml_generate_text_status,prompt
0,"{""predictions"":[{""citationMetadata"":{""citation...",,Provide a summary of the product attributes an...
1,"{""predictions"":[{""citationMetadata"":{""citation...",,Provide a summary of the product attributes an...
2,"{""predictions"":[{""citationMetadata"":{""citation...",,Provide a summary of the product attributes an...
3,"{""predictions"":[{""citationMetadata"":{""citation...",,Provide a summary of the product attributes an...
4,"{""predictions"":[{""citationMetadata"":{""citation...",,Provide a summary of the product attributes an...
...,...,...,...
95,"{""predictions"":[{""citationMetadata"":{""citation...",,Provide a summary of the product attributes an...
96,"{""predictions"":[{""citationMetadata"":{""citation...",,Provide a summary of the product attributes an...
97,"{""predictions"":[{""citationMetadata"":{""citation...",,Provide a summary of the product attributes an...
98,"{""predictions"":[{""citationMetadata"":{""citation...",,Provide a summary of the product attributes an...


In [65]:
import json
from pprint import pprint


pprint(predictions['prompt'][2]), pprint(json.loads(predictions['ml_generate_text_result'][2]))

('Provide a summary of the product attributes and heirarchy for product:  '
 'Gingham dress  A dress featuring a two-toned checkered pattern, often '
 'associated with picnics and summery outfits.')
{'predictions': [{'citationMetadata': {'citations': []},
                  'content': '**Product Hierarchy**\n'
                             '\n'
                             '* Gingham Dress\n'
                             '    * Color\n'
                             '        * Red\n'
                             '        * Blue\n'
                             '        * White\n'
                             '    * Pattern\n'
                             '        * Plaid\n'
                             '        * Checkered\n'
                             '    * Fabric\n'
                             '        * Cotton\n'
                             '        * Polyester\n'
                             '    * Sleeve Length\n'
                             '        * Short\n'
                 

(None, None)