In [1]:
import httpx
import pandas as pd
from datetime import datetime

def remove_keys_recursive(d, keys_to_remove):
    if isinstance(d, dict):
        return {
            key: remove_keys_recursive(value, keys_to_remove)
            for key, value in d.items()
            if key not in keys_to_remove
        }
    elif isinstance(d, list):
        return [remove_keys_recursive(item, keys_to_remove) for item in d]
    else:
        return d

def rename_quantity_sold(product):
    if "quantity_sold" in product:
        quantity_sold = product.pop("quantity_sold")
        product["quantity_sold_value"] = quantity_sold["value"]
    return product

def make_api_request(url, params, headers):
    with httpx.Client() as client:
        response = client.get(url, params=params, headers=headers)
    return response

def flatten_data(item):
    flattened_data = {
        key: value for key, value in item.items() if key not in ["visible_impression_info"]
    }
    
    visible_impression_info = item.get("visible_impression_info", {}).get("amplitude", {})

    fields_to_extract = [
        "category_l1_name",
        "category_l2_name",
        "category_l3_name",
        "seller_type",
        "primary_category_name",
        "is_imported",
    ]

    flattened_data.update({field: visible_impression_info.get(field, None) for field in fields_to_extract})

    return flattened_data

def save_to_local(data, timestamp):
    # Remove specified keys from all levels of the nested dictionary
    keys_to_remove = ['impression_info', 'badges_new']
    cleaned_data = remove_keys_recursive(data, keys_to_remove)

    # Rename the "quantity_sold" field to "quantity_sold_value"
    cleaned_data = [rename_quantity_sold(product) for product in cleaned_data]

    # Flatten the nested structure
    cleaned_data = [flatten_data(product) for product in cleaned_data]

    # Convert the cleaned data to a Pandas DataFrame
    pd_df = pd.DataFrame(cleaned_data)

    # Convert timestamp to Unix time
    unix_timestamp = int(datetime.timestamp(timestamp))

    pd_df["ingestion_dt_unix"] = unix_timestamp
    pd_df.rename(columns={"id": "tiki_pid"}, inplace=True)
    pd_df["ingestion_date"] = pd.to_datetime(pd.to_datetime(pd_df["ingestion_dt_unix"], unit='s').dt.date)

    pd_df["quantity_sold_value"] = pd_df["quantity_sold_value"] + 4

    # Specify local path for saving
    local_path_csv = f'./data/raw_116532_{unix_timestamp}.csv'
    local_path_parquet = f'./data/raw_116532_{unix_timestamp}.parquet'
    
    # Write the DataFrame to local CSV file
    pd_df.to_csv(local_path_csv, index=False)

    # Write the DataFrame to local Parquet file
    #pd_df.to_parquet(local_path_parquet, index=False)

    print(f"Local Data saved successfully:\nCSV: {local_path_csv}\nParquet: {local_path_parquet}")

    return pd_df

# API request
url = 'https://api.tiki.vn/seller-store/v2/collections/116532/products'
params = {'limit': 100, 'cursor': 40}
headers = {'x-source': 'local', 'Host': 'api.tiki.vn'}

response = make_api_request(url, params, headers)

if response.status_code == 200:
    data = response.json()['data']

    timestamp = datetime.now()

    # Save to local files
    cleaned_data_out = save_to_local(data, timestamp)
else:
    print(f"Error: {response.status_code}")
    print(response.text)

# Display the Pandas DataFrame
print(cleaned_data_out.head(2))

Local Data saved successfully:
CSV: ./data/raw_116532_1711861276.csv
Parquet: ./data/raw_116532_1711861276.parquet
    tiki_pid            sku  \
0  184476147  2833967241350   
1  270851667  8398718337298   

                                                name  \
0  Bộ đôi Sữa rửa mặt tạo bọt Collagen cho da Khô...   
1  Mặt nạ giấy dưỡng da Hàn Quốc innisfree My Rea...   

                                             url_key  \
0  bo-doi-sua-rua-mat-tao-bot-collagen-cho-da-kho...   
1  mat-na-giay-duong-da-han-quoc-innisfree-my-rea...   

                                            url_path   price  discount  \
0  bo-doi-sua-rua-mat-tao-bot-collagen-cho-da-kho...  173000     65000   
1  mat-na-giay-duong-da-han-quoc-innisfree-my-rea...   23000      4000   

   discount_rate  rating_average  review_count  ... category_l3_name  \
0             27             5.0             2  ...             None   
1             15             5.0            20  ...             None   

      seller_

In [2]:
from util_minio import MinioHandler

TypeError: deprecated() got an unexpected keyword argument 'name'

In [None]:
minio_handler = MinioHandler()
#s3.Bucket('tiki').upload_file('./data/raw_116532_1710308519.csv', 'raw_116532_1710308519.csv')

In [None]:
minio_handler.upload_file("tiki", "raw_116532_1710396371.csv", "./data/raw_116532_1710396371.csv")