In [1]:
import os
import httpx
import pandas as pd
from datetime import datetime
import sys

module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path+"/my_utils")

import util_minio
# TypeError: deprecated() got an unexpected keyword argument 'name'
# Fix: pip uninstall pyOpenSSL 
# https://stackoverflow.com/questions/74147284/i-cannot-run-pip-anymore

minio_handler = util_minio.MinioHandler()

def remove_keys_recursive(d, keys_to_remove):
    if isinstance(d, dict):
        return {
            key: remove_keys_recursive(value, keys_to_remove)
            for key, value in d.items()
            if key not in keys_to_remove
        }
    elif isinstance(d, list):
        return [remove_keys_recursive(item, keys_to_remove) for item in d]
    else:
        return d

def rename_quantity_sold(product):
    if "quantity_sold" in product:
        quantity_sold = product.pop("quantity_sold")
        product["quantity_sold_value"] = quantity_sold["value"]
    return product

def make_api_request(url, params, headers):
    with httpx.Client() as client:
        response = client.get(url, params=params, headers=headers)
    return response

def flatten_data(item):
    flattened_data = {
        key: value for key, value in item.items() if key not in ["visible_impression_info"]
    }
    
    visible_impression_info = item.get("visible_impression_info", {}).get("amplitude", {})

    fields_to_extract = [
        "category_l1_name",
        "category_l2_name",
        "category_l3_name",
        "seller_type",
        "primary_category_name",
        "is_imported",
    ]

    flattened_data.update({field: visible_impression_info.get(field, None) for field in fields_to_extract})

    return flattened_data

def save_to_df(data, timestamp):
    # Remove specified keys from all levels of the nested dictionary
    keys_to_remove = ['impression_info', 'badges_new']
    cleaned_data = remove_keys_recursive(data, keys_to_remove)

    # Rename the "quantity_sold" field to "quantity_sold_value"
    cleaned_data = [rename_quantity_sold(product) for product in cleaned_data]

    # Flatten the nested structure
    cleaned_data = [flatten_data(product) for product in cleaned_data]

    # Convert the cleaned data to a Pandas DataFrame
    pd_df = pd.DataFrame(cleaned_data)

    # Convert timestamp to Unix time
    unix_timestamp = int(datetime.timestamp(timestamp))

    pd_df["ingestion_dt_unix"] = unix_timestamp
    pd_df.rename(columns={"id": "tiki_pid"}, inplace=True)
    pd_df["ingestion_date"] = pd.to_datetime(pd.to_datetime(pd_df["ingestion_dt_unix"], unit='s').dt.date)

    pd_df["quantity_sold_value"] = pd_df["quantity_sold_value"] + 4

    # Specify path for saving
    csv_name = f'raw_{unix_timestamp}.csv'

    return pd_df, csv_name

def save_to_minio(bucket_name, object_name, dataframe):
    minio_handler.save_dataframe_to_csv(bucket_name, object_name, dataframe)

# API request
url = 'https://api.tiki.vn/seller-store/v2/collections/116532/products'
params = {'limit': 100, 'cursor': 40}
headers = {'x-source': 'local', 'Host': 'api.tiki.vn'}

response = make_api_request(url, params, headers)

if response.status_code == 200:
    data = response.json()['data']

    timestamp = datetime.now()

    # Save to local files
    df_tiki, csv_name = save_to_df(data, timestamp)

    save_to_minio("tiki", f"raw/{csv_name}", df_tiki)
    

else:
    print(f"Error: {response.status_code}")
    print(response.text)

# Display the Pandas DataFrame
print('Exported to Minio: ', csv_name)
df_tiki.head(2)

Exported to Minio:  raw_116532_1716029177.csv


Unnamed: 0,tiki_pid,sku,name,url_key,url_path,price,discount,discount_rate,rating_average,review_count,...,category_l2_name,category_l3_name,seller_type,is_imported,layout_type,is_normal_delivery,imported,video_url,ingestion_dt_unix,ingestion_date
0,200629270,2519676253504,[Tặng Bộ 3 ly thủy tinh cao cấp NESCAFE] Combo...,combo-2-bich-ca-phe-hoa-tan-nescafe-3in1-vi-ng...,combo-2-bich-ca-phe-hoa-tan-nescafe-3in1-vi-ng...,208000,101000,33,5.0,51,...,Đồ uống,Cà phê,OFFICIAL_STORE,False,,,,,1716029177,2024-05-18
1,195357772,4882924146795,Máy Tính CASIO FX-880BTG - Màu Đen,may-tinh-casio-fx-880btg-mau-den-p195357772,may-tinh-casio-fx-880btg-mau-den-p195357772.ht...,698000,102000,13,5.0,195,...,Văn phòng phẩm,Máy Tính Điện Tử,OFFICIAL_STORE,False,grid_view,,,,1716029177,2024-05-18


In [2]:
df_tiki.columns

Index(['tiki_pid', 'sku', 'name', 'url_key', 'url_path', 'price', 'discount',
       'discount_rate', 'rating_average', 'review_count', 'thumbnail_url',
       'seller_product_id', 'original_price', 'isGiftAvailable',
       'fastest_delivery_time', 'is_from_official_store', 'tiki_verified',
       'seller_product_sku', 'origin', 'availability', 'primary_category_path',
       'shippable', 'earliest_delivery_estimate', 'seller_name', 'order_route',
       'tiki_hero', 'thumbnail_width', 'is_nextday_delivery',
       'is_tikinow_delivery', 'seller_id', 'brand_name',
       'primary_category_name', 'brand_id', 'thumbnail_height', 'is_authentic',
       'productset_id', 'master_product_sku', 'quantity_sold_value',
       'category_l1_name', 'category_l2_name', 'category_l3_name',
       'seller_type', 'is_imported', 'layout_type', 'is_normal_delivery',
       'imported', 'video_url', 'ingestion_dt_unix', 'ingestion_date'],
      dtype='object')