In [1]:
import httpx
import pandas as pd
from datetime import datetime
import sys

def remove_keys_recursive(d, keys_to_remove):
    if isinstance(d, dict):
        return {
            key: remove_keys_recursive(value, keys_to_remove)
            for key, value in d.items()
            if key not in keys_to_remove
        }
    elif isinstance(d, list):
        return [remove_keys_recursive(item, keys_to_remove) for item in d]
    else:
        return d

def rename_quantity_sold(product):
    if "quantity_sold" in product:
        quantity_sold = product.pop("quantity_sold")
        product["quantity_sold_value"] = quantity_sold["value"]
    return product

def make_api_request(url, params, headers):
    with httpx.Client() as client:
        response = client.get(url, params=params, headers=headers)
    return response

def flatten_data(item):
    flattened_data = {
        key: value for key, value in item.items() if key not in ["visible_impression_info"]
    }
    
    visible_impression_info = item.get("visible_impression_info", {}).get("amplitude", {})

    fields_to_extract = [
        "category_l1_name",
        "category_l2_name",
        "category_l3_name",
        "seller_type",
        "primary_category_name",
        "is_imported",
    ]

    flattened_data.update({field: visible_impression_info.get(field, None) for field in fields_to_extract})

    return flattened_data

def save_to_local(data, timestamp):
    # Remove specified keys from all levels of the nested dictionary
    keys_to_remove = ['impression_info', 'badges_new']
    cleaned_data = remove_keys_recursive(data, keys_to_remove)

    # Rename the "quantity_sold" field to "quantity_sold_value"
    cleaned_data = [rename_quantity_sold(product) for product in cleaned_data]

    # Flatten the nested structure
    cleaned_data = [flatten_data(product) for product in cleaned_data]

    # Convert the cleaned data to a Pandas DataFrame
    pd_df = pd.DataFrame(cleaned_data)

    # Convert timestamp to Unix time
    unix_timestamp = int(datetime.timestamp(timestamp))

    pd_df["ingestion_dt_unix"] = unix_timestamp
    pd_df.rename(columns={"id": "tiki_pid"}, inplace=True)
    pd_df["ingestion_date"] = pd.to_datetime(pd.to_datetime(pd_df["ingestion_dt_unix"], unit='s').dt.date)

    pd_df["quantity_sold_value"] = pd_df["quantity_sold_value"] + 4

    ## Specify local path for saving
    local_path_csv = f'../data/raw_116532_{unix_timestamp}.csv'
    ## Write the DataFrame to local CSV file
    pd_df.to_csv(local_path_csv, index=False)

    return pd_df

# API request
url = 'https://api.tiki.vn/seller-store/v2/collections/116532/products'
params = {'limit': 100, 'cursor': 40}
headers = {'x-source': 'local', 'Host': 'api.tiki.vn'}

response = make_api_request(url, params, headers)

if response.status_code == 200:
    data = response.json()['data']

    timestamp = datetime.now()

    # Save to local files
    cleaned_data_out = save_to_local(data, timestamp)
else:
    print(f"Error: {response.status_code}")
    print(response.text)

# Display the Pandas DataFrame
cleaned_data_out.head(2)

Unnamed: 0,tiki_pid,sku,name,url_key,url_path,price,discount,discount_rate,rating_average,review_count,...,seller_type,is_imported,video_url,fastest_delivery_time,earliest_delivery_estimate,order_route,layout_type,imported,ingestion_dt_unix,ingestion_date
0,252608882,4825599512084,Tã/bỉm quần HUGGIES SKINCARE MEGA JUMBO size X...,thung-ta-bim-quan-huggies-skincare-mega-jumbo-...,thung-ta-bim-quan-huggies-skincare-mega-jumbo-...,365000,280750,43,4.8,329,...,OFFICIAL_STORE,False,,,,,,,1712117838,2024-04-03
1,252608891,1033225203769,Tã/bỉm quần HUGGIES SKINCARE gói SUPER JUMBO s...,ta-bim-quan-huggies-skincare-goi-super-jumbo-s...,ta-bim-quan-huggies-skincare-goi-super-jumbo-s...,289000,184130,39,5.0,169,...,OFFICIAL_STORE,False,,,,,,,1712117838,2024-04-03


In [2]:
sys.path

['/Users/ta/Documents/sch_tiki_etl/dim_product',
 '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python39.zip',
 '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9',
 '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/lib-dynload',
 '',
 '/Users/ta/Library/Python/3.9/lib/python/site-packages',
 '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/site-packages']

In [3]:
import os
module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path+"/my_utils")

In [4]:
import util_minio
# TypeError: deprecated() got an unexpected keyword argument 'name'
# Fix: pip uninstall pyOpenSSL 
# https://stackoverflow.com/questions/74147284/i-cannot-run-pip-anymore

In [5]:
minio_handler = util_minio.MinioHandler()

In [6]:
minio_handler.save_dataframe_to_csv("tiki", "raw/raw_116532_1711993659.csv", cleaned_data_out)