## Import Necessary Library

In [1]:
import ibis
from google.oauth2 import service_account

In [2]:
# Load credentials from a service account JSON file
credentials = service_account.Credentials.from_service_account_file(
    '/opt/spark/work-dir/google-service-account.json',
    scopes=['https://www.googleapis.com/auth/bigquery'] # Specify necessary scopes
)

con = ibis.bigquery.connect(
    project_id='personal-use-461616',
    credentials=credentials
)

In [3]:
con.list_databases()

['ecommerce_product']

In [4]:
con.list_tables(database = 'ecommerce_product')

['external_ingest_product', 'product_price_raw_data']

## Interactive mode

In [5]:
ibis.options.interactive = False

In [6]:
raw_df = con.table("ecommerce_product.product_price_raw_data")
raw_df

In [7]:
raw_df.head().to_pandas()

Unnamed: 0,product_name_sha256,product_name,sale_price,ecommerce_name,ingest_timestamp_utc
0,b'\x1av\x0e]N\x91\xcf\xfb\xf7\xb0e\xabo\x96\xf...,dermasense vita-c brightening serum 30ml เซรั่...,400.0,ecommerce_01,2025-06-20 03:03:30+00:00
1,b'\x8f\x98\xa5\x13[\x99\xca\x17\x18\xd5E\xa8\x...,vitalcore bio-defense daily 90 capsules (ปกป้อ...,1200.0,ecommerce_01,2025-06-20 03:03:30+00:00
2,b'\xb2\x88/\x8b\x86\x1d\xa0\xcc\xaddX\x01Sx\x1...,dermasense vita-c brightening serum 30ml,300.0,ecommerce_02,2025-06-20 03:03:30+00:00
3,b'\x9b\x9f\x9d\x08\xbb\xd9\x9b\x1b$\xc6X\x97u*...,vitalcore bio-defense daily 90 capsules,1100.0,ecommerce_02,2025-06-20 03:03:30+00:00
4,"b'\xf7t""\xed\x89S\xf4\xbe\xc5+\xf5\xfcC\xdd\xd...",dermasense ph balance dry skin lotion 400ml (l...,510.0,ecommerce_01,2025-06-19 03:03:30+00:00


## Transform Data

### Split Data by Website

In [9]:
em_01_df = raw_df.filter(raw_df.ecommerce_name == 'ecommerce_01')
em_02_df = raw_df.filter(raw_df.ecommerce_name == 'ecommerce_02')
print("-------- em_01_df expression --------")
print(em_01_df)
print("\n-------- em_02_df expression --------")
print(em_02_df)

-------- em_01_df expression --------
r0 := DatabaseTable: personal-use-461616.ecommerce_product.product_price_raw_data
  product_name_sha256  binary
  product_name         string
  sale_price           float64
  ecommerce_name       string
  ingest_timestamp_utc timestamp('UTC')

Filter[r0]
  r0.ecommerce_name == 'ecommerce_01'

-------- em_02_df expression --------
r0 := DatabaseTable: personal-use-461616.ecommerce_product.product_price_raw_data
  product_name_sha256  binary
  product_name         string
  sale_price           float64
  ecommerce_name       string
  ingest_timestamp_utc timestamp('UTC')

Filter[r0]
  r0.ecommerce_name == 'ecommerce_02'


### Establish Product Name Baseline

Establish a product name baseline by utilizing the `ecommerce_02` product name.<br> 
This `ecommerce_02` product name will then be used as a query to identify similar products.