In [2]:
import os
import pandas as pd
import yaml
import logging
import requests
from concurrent.futures import ThreadPoolExecutor

from utils.training_utils import get_features_attribute

import warnings
warnings.filterwarnings('ignore')

In [3]:
country_info = yaml.safe_load(open(os.path.join('d:\\meli_case_solution', 'src', 'config', 'country_infos.yaml'), "r"))
features_config = yaml.safe_load(open(os.path.join('d:\\meli_case_solution', 'src', 'config', 'feature_config.yaml'), "r"))
features_struct = get_features_attribute(features_config, attribute='struct')

In [4]:
offsets = [i for i in range(0, 1000, 50)]

def fetch_category_data(country_id, category_id):
    serial_lst = []
    with requests.Session() as session:
        for offset in offsets:
            url = f"https://api.mercadolibre.com/sites/{country_id}/search?category={category_id}&offset={offset}"
            response = session.get(url)
            items = response.json()
            serial_lst.append(pd.DataFrame(items['results']))
    return pd.concat(serial_lst, ignore_index=True)


def process_country(country):
    cats = requests.get(f"https://api.mercadolibre.com/sites/{country['id']}/categories")
    category_ids_country = {item['id'] for item in cats.json()}
    
    with ThreadPoolExecutor() as executor:
            dataframes = list(executor.map(lambda category_id: fetch_category_data(country['id'], category_id), category_ids_country))
    
    df_country_id = pd.concat(dataframes, ignore_index=True)
    return df_country_id


In [5]:
lst = []

for i in range(len(country_info['country'])):
    country = country_info['country'][i]
    df = process_country(country)
    lst.append(df)
    print(f"{country['id']}: {df.shape}")

MEC: (25623, 40)
MRD: (10853, 39)
MLA: (31733, 40)
MCO: (30874, 40)
MBO: (2906, 39)
MCR: (14823, 39)
MLU: (30480, 40)
MLB: (31695, 40)
MPY: (6425, 39)
MGT: (4819, 39)
MSV: (844, 39)
MPE: (29916, 40)
MLM: (30521, 40)
MHN: (1029, 39)
MLV: (28482, 40)
MNI: (726, 39)
MPA: (3375, 39)


In [6]:
df = pd.concat(lst, ignore_index=True)

In [8]:
df.head()

Unnamed: 0,id,title,condition,thumbnail_id,catalog_product_id,listing_type_id,sanitized_title,permalink,buying_mode,site_id,...,discounts,promotion_decorations,promotions,inventory_id,variation_id,variations_data,official_store_name,variation_filters,location,seller_contact
0,MEC565864602,Carolina Herrera Good Girl Tradicional Edp 80 ...,new,920992-MLA49695549958_042022,MEC6271502,gold_pro,,https://www.mercadolibre.com.ec/carolina-herre...,buy_it_now,MEC,...,,,,,,,,,,
1,MEC556439976,Masajeador Facial Barra T Dorada Antiarrugas R...,new,961391-MLU70396741248_072023,MEC24435970,bronze,,https://www.mercadolibre.com.ec/masajeador-fac...,buy_it_now,MEC,...,,,,,,,,,,
2,MEC530948501,Maquina Afeitadora Trimmer Recargable Dorada D...,new,605769-MLA79551245073_092024,MEC27999971,gold_pro,,https://www.mercadolibre.com.ec/maquina-afeita...,buy_it_now,MEC,...,,,,,,,,,,
3,MEC565968568,Carolina Herrera 212 Heroes Forever Young Hero...,new,777985-MLA52221299620_102022,MEC19034511,gold_pro,,https://www.mercadolibre.com.ec/carolina-herre...,buy_it_now,MEC,...,,,,,,,,,,
4,MEC592307172,144 Condones Preservativos Sure Uso Frecuente,new,912232-MLA80948355299_112024,MEC43769140,bronze,,https://www.mercadolibre.com.ec/144-condones-p...,buy_it_now,MEC,...,,,,,,,,,,
