In [2]:
import os
import sys

module_path = os.path.abspath(os.path.join('..')) + '/my_utils'
if module_path not in sys.path:
    sys.path.append(module_path)

from datetime import datetime
from util_minio import MinioHandler
from extract import Extract
from transform import Transform
from load import Load

class ETLProcess:
    def __init__(self, extract, transform, load):
        self.extract = extract
        self.transform = transform
        self.load = load

    def run(self, transform_columns, table_name):
        # Step 1: Extract
        extracted_file = self.extract.execute()
        print(extracted_file)
        
        # Step 2: Transform and save to Parquet
        parquet_path = self.transform.execute(extracted_file, transform_columns)
        print(type(parquet_path))
        print(parquet_path)
        
        # Step 3: Load the transformed data into the database
        self.load.execute(parquet_path, table_name)

# Example usage:
if __name__ == "__main__":
    # Initialize MinIO handler and other components
    api_url = 'https://api.tiki.vn/seller-store/v2/collections/116532/products'
    params = {'limit': 100, 'cursor': 40}
    headers = {'x-source': 'local', 'Host': 'api.tiki.vn'}
    bucket_name = "tiki"
    db_url = 'postgresql://my_user:my_password@localhost:35432/dw_tiki'
    transform_columns = ["tiki_pid", "name", "brand_name", "origin", 'ingestion_date', 'ingestion_dt_unix']
    table_name = 'dim_product'

    # Initialize classes
    extract = Extract(api_url, params, headers, bucket_name)
    transform = Transform(bucket_name)
    load = Load(db_url)

    # Run ETL process
    etl = ETLProcess(extract, transform, load)
    etl.run(transform_columns, table_name)


Exported to Minio: raw_1716034211.csv
raw_1716034211.csv
<class 'str'>
s3://tiki/curated/dim_product/transformed_1716034211.parquet
     tiki_pid                                               name brand_name  \
0   200629270  [Tặng Bộ 3 ly thủy tinh cao cấp NESCAFE] Combo...    NESCAFÉ   
1   195357772                 Máy Tính CASIO FX-880BTG - Màu Đen      Casio   
2   195109412  Bộ nồi Inox dập nguyên khối Elmich Trimax Clas...     Elmich   
3   193570411  Nồi áp suất Elmich PCE-1805 dung tích 2.5L - H...     Elmich   
4   186118365  Sữa bột Vinamilk Dielac Alpha Gold 2 800g (cho...   Vinamilk   
5   147903537  Bếp Từ Đơn Sunhouse SHD6157 (1800W) - Kèm Nồ...   Sunhouse   
6   143185998          Ăn Dặm Không Phải Là Cuộc Chiến (Tái Bản)       None   
7   138934612  Combo 4 Cuốn: Tô Màu Phát Triển Trí Não Bộ Cho...       None   
8   138481685  Phần mềm Microsoft 365 Family English APAC EM ...  Microsoft   
9   110255220  Ấm đun nước siêu tốc Elmich Smartcook ICS-3859...  Smartcook   