In [1]:
import json
import math
import os
import time
from datetime import datetime

import numpy as np
import pandas as pd
import psycopg2
import requests
from dotenv import load_dotenv
from loguru import logger
from psycopg2 import sql

In [2]:
load_dotenv()
ACCESS_TOKEN_BUENOSHOPS = os.getenv("ACCESS_TOKEN_BUENOSHOPS")
SELLER_ID_BUENOSHOPS = os.getenv("SELLER_ID_BUENOSHOPS")

ACCESS_TOKEN_MUSICALCRIS = os.getenv("ACCESS_TOKEN_MUSICALCRIS")
SELLER_ID_MUSICALCRIS = os.getenv("SELLER_ID_MUSICALCRIS")

ACCESS_TOKEN_MCENTER = os.getenv("ACCESS_TOKEN_MCENTER")
SELLER_ID_MCENTER = os.getenv("SELLER_ID_MCENTER")

HOST = os.getenv("HOST")
POSTGRES_DB = os.getenv("POSTGRES_DB")
POSTGRES_USER = os.getenv("POSTGRES_USER")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")

In [3]:
db_config = {
    "host": HOST,
    "database": POSTGRES_DB,
    "user": POSTGRES_USER,
    "password": POSTGRES_PASSWORD,
}

In [4]:
# Consulta aos itens com logistic_type=fulfillment
base_url = f"https://api.mercadolibre.com/users/{SELLER_ID_MCENTER}/items/search?logistic_type=fulfillment"

params = {
    "limit": 100,
    "offset": 0,
}

headers = {"Authorization": f"Bearer {ACCESS_TOKEN_MCENTER}"}

# buscando lista de códigos
json_list = []
try:
    while True:
        response = requests.get(base_url, headers=headers, params=params)
        response.raise_for_status()
        data = response.json()
        logger.info(data)
        if "results" in data:
            json_list.extend(data["results"])
            logger.info(data["results"])
        else:
            break

        # Verifique se há mais páginas
        if "paging" in data:
            total_data = data["paging"].get("total")

            total_pages = math.ceil(total_data / params["limit"])
            logger.info(f"Total de páginas a serem processadas: {total_pages}")
            logger.info(f'Offset atual: {params["offset"]}')

            if params["offset"] >= total_pages * params["limit"]:
                break

            params["offset"] += params["limit"]
        else:
            break

except requests.exceptions.RequestException as req_err:
    logger.error(f"Erro ao fazer a requisição para {base_url}: {req_err}")
except Exception as e:
    logger.error(f"Erro não esperado: {e}")

df_json_list = pd.DataFrame(json_list)


[32m2023-12-14 14:58:34.377[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m18[0m - [1m{'seller_id': '233632476', 'results': ['MLB3778561802', 'MLB3436515963', 'MLB3778538730', 'MLB924922735', 'MLB949771924', 'MLB949776093', 'MLB949788598', 'MLB949790387', 'MLB950297972', 'MLB950303724', 'MLB951043222', 'MLB1363602423', 'MLB1397153967', 'MLB1398114866', 'MLB1399101853', 'MLB1400763583', 'MLB1401964928', 'MLB1401946009', 'MLB1403147752', 'MLB1403024540', 'MLB1403999905', 'MLB1403934211', 'MLB1403920691', 'MLB1403791227', 'MLB1403763907', 'MLB1416821703', 'MLB1418498416', 'MLB1418470905', 'MLB1418466676', 'MLB1417658704', 'MLB1418014629', 'MLB1418532096', 'MLB1425101880', 'MLB1431387563', 'MLB1435969309', 'MLB1435852259', 'MLB1435836214', 'MLB1400355425', 'MLB1440291959', 'MLB1440288810', 'MLB1440247563', 'MLB1417877602', 'MLB1425113746', 'MLB1453957901', 'MLB1453221977', 'MLB1457193551', 'MLB1459840256', 'MLB1459799426', 'MLB1459799420', 'MLB1459771919', 'MLB1461271

In [5]:
df_json_list

Unnamed: 0,0
0,MLB3778561802
1,MLB3436515963
2,MLB3778538730
3,MLB924922735
4,MLB949771924
...,...
431,MLB4272734766
432,MLB3532694787
433,MLB3534524129
434,MLB3536103973


In [6]:

# buscando de itens em json
json_list_item = []
c = 1
for item in json_list:
    base_url = f"https://api.mercadolibre.com/items/{item}"
    headers = {"Authorization": f"Bearer {ACCESS_TOKEN_MCENTER}"}
    t = len(json_list)
    logger.info(item)
    logger.info(f"{c}/{t}")
    c += 1

    try:
        response = requests.get(base_url, headers=headers)
        response.raise_for_status()
        data = response.json()
        json_list_item.append(data)
        logger.info(f"Tamanho da nova lista: {len(json_list_item)}/{t}")
    except requests.exceptions.RequestException as e:
        logger.error(f"Erro ao obter dados para o item {item}: {e}")

    # Se c for um múltiplo de 50, aguarde 1 minuto
    if c % 50 == 0:
        logger.warning("Esperando 1 minuto...")
        time.sleep(60)

logger.info(f"Tamanho da lista de itens: {len(json_list_item)}")

# Salvando a lista de itens
caminho_arquivo = f"Data/Output/list_mcenter_items.json"

with open(caminho_arquivo, "w") as arquivo:
    json.dump(json_list_item, arquivo)

with open(caminho_arquivo, "r") as arquivo:
    json_list_item = json.load(arquivo)

df_list_item = pd.DataFrame(json_list_item)

logger.info(f"Tamanho do dataframe de itens: {df_list_item.shape}")
df_list_item.sample()

[32m2023-12-14 14:58:40.236[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mMLB3778561802[0m
[32m2023-12-14 14:58:40.238[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1m1/436[0m
[32m2023-12-14 14:58:41.403[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m17[0m - [1mTamanho da nova lista: 1/436[0m
[32m2023-12-14 14:58:41.404[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mMLB3436515963[0m
[32m2023-12-14 14:58:41.404[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1m2/436[0m
[32m2023-12-14 14:58:42.618[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m17[0m - [1mTamanho da nova lista: 2/436[0m
[32m2023-12-14 14:58:42.620[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mMLB3778538730[0m
[32m2023-12-14 14:58:42.623[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1m3/

Unnamed: 0,id,site_id,title,seller_id,category_id,user_product_id,official_store_id,price,base_price,original_price,...,parent_item_id,differential_pricing,deal_ids,automatic_relist,date_created,last_updated,health,catalog_listing,item_relations,channels
187,MLB2003287174,MLB,Correia Guitarra Violão Baixo Basso Vt Fl Flor...,233632476,MLB72752,,4572,109.9,109.9,,...,,,[],False,2021-08-30T17:59:10.000Z,2023-12-10T10:07:58.000Z,1.0,False,"[{'id': 'MLB3537409305', 'variation_id': None,...","[marketplace, mshops]"


In [7]:
# pegando dados em attributes
# attributes: SELLER_SKU
resultados_attributes = []

for item in json_list_item:
    # Extrair os valores desejados
    first_id = item["id"]
    inventory_id = item["inventory_id"]
    variations = item["variations"]
    status = item["status"]
    catalog_product_id = item["catalog_product_id"]
    seller_custom_field = item["seller_custom_field"]
    catalog_listing = item["catalog_listing"]
    logistic_type = item["shipping"]["logistic_type"]
    item_relations = item["item_relations"]

    # Procurar em "attributes" onde "id" é "SELLER_SKU"
    seller_sku_entry = next(
        (attr for attr in item["attributes"] if attr["id"] == "SELLER_SKU"), None
    )

    # Pegar "value_name" e "value_id" se a entrada existir, caso contrário, definir como None
    attribute_value_name = (
        seller_sku_entry["value_name"] if seller_sku_entry else None
    )
    attribute_value_id = seller_sku_entry["value_id"] if seller_sku_entry else None

    # Adicionar os resultados_attributes à lista
    resultados_attributes.append(
        {
            "ml_code": first_id,
            "inventory_id": inventory_id,
            "status": status,
            "variations": variations,
            "catalog_listing": catalog_listing,
            "logistic_type":logistic_type,
        }
    )

df_sku = pd.DataFrame(resultados_attributes)

In [8]:
df_sku

Unnamed: 0,ml_code,inventory_id,status,variations,catalog_listing,logistic_type
0,MLB3778561802,AVOX05934,closed,[],True,fulfillment
1,MLB3436515963,WTHI04163,closed,[],True,fulfillment
2,MLB3778538730,JLAX10536,closed,[],True,fulfillment
3,MLB924922735,JFGN34621,paused,[],False,fulfillment
4,MLB949771924,TDFV00990,paused,[],False,fulfillment
...,...,...,...,...,...,...
429,MLB4272734766,ENML56386,active,[],True,fulfillment
430,MLB3532694787,BLHH53768,active,[],True,fulfillment
431,MLB3534524129,TIOX11058,active,[],True,fulfillment
432,MLB3536103973,WOJB37093,active,[],True,fulfillment


In [9]:
# pegando dados em variations
# variations: variation_id,  attribute_combination: value_id, value_name, seller_sku ,inventory_id
resultados_variations = []
    
for item in json_list_item:
    # Extrair os valores comuns para cada item
    first_id = item.get("id")
    inventory_id = item.get("inventory_id")
    logistic_type = item.get("shipping", {}).get("logistic_type")

    # Extrair os valores específicos para cada variação
    for variacao in item.get("variations", []):
        variation_id = variacao.get("id")
        variation_seller_sku = variacao.get("seller_custom_field")
        variation_inventory_id = variacao.get("inventory_id")
        attribute_combination = variacao.get("attribute_combinations", [{}])[0]
        value_id = attribute_combination.get("value_id")
        value_name = attribute_combination.get("value_name")
        item_relations = attribute_combination.get("item_relations", [{}])[0]

        # Adicionar os resultados_variations à lista
        resultados_variations.append(
            {
                "ml_code": first_id,
                "inventory_id": inventory_id,
                # "logistic_type": logistic_type,
                "variation_id": variation_id,
                # "value_id": value_id,
                "value_name": value_name,
                # "var_seller_sku": variation_seller_sku,
                "variation_inventory_id": variation_inventory_id,
                # "item_relations":item_relations,
            }
        )

df_variations = pd.DataFrame(resultados_variations)

In [10]:
df_variations

Unnamed: 0,ml_code,inventory_id,variation_id,value_name,variation_inventory_id
0,MLB1398114866,,174302061130,0.012,TNXB83665
1,MLB1398114866,,174302125663,0.010,DXJZ75064
2,MLB1398114866,,174301393967,0.009,JDMV76187
3,MLB1398114866,,174302805456,0.011,RJUL81287
4,MLB1403147752,,48946752091,Madeira,GLMK21133
...,...,...,...,...,...
163,MLB3339660079,,178533059693,35,QHGI58432
164,MLB3339660079,,178533059695,3,IYNL59473
165,MLB3339660079,,178533059697,2,ARDK78657
166,MLB3339660079,,178533059699,25,IDSV51539


In [11]:
# Unindo as duas tabelas
df_sku_var = pd.merge(
    df_sku,
    df_variations,
    left_on=["ml_code", "inventory_id"],
    right_on=["ml_code", "inventory_id"],
    how="left",
)
df_sku_var = df_sku_var.drop(["variations", "variation_id"], axis=1)
df_sku_var

# *se variation_inventory_id = None -> variation_inventory_id == inventory_id && remove inventory_id && variation_inventory_id rename to inventory_id*
df_sku_var["variation_inventory_id"].fillna(
    df_sku_var["inventory_id"], inplace=True
)

# Editando tabela
cols = [
    "ml_code",
    "variation_inventory_id",
    "value_name",
    "status",
    "catalog_listing",
    "logistic_type"
]
df_sku_var = df_sku_var[cols]
df_sku_var = df_sku_var.rename(columns={"variation_inventory_id": "inventory_id"})

logger.info(f"Tamanho do dataframe final: {df_sku_var.shape}")

[32m2023-12-14 15:24:26.734[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m29[0m - [1mTamanho do dataframe final: (523, 6)[0m


In [12]:
df_sku_var

Unnamed: 0,ml_code,inventory_id,value_name,status,catalog_listing,logistic_type
0,MLB3778561802,AVOX05934,,closed,True,fulfillment
1,MLB3436515963,WTHI04163,,closed,True,fulfillment
2,MLB3778538730,JLAX10536,,closed,True,fulfillment
3,MLB924922735,JFGN34621,,paused,False,fulfillment
4,MLB949771924,TDFV00990,,paused,False,fulfillment
...,...,...,...,...,...,...
518,MLB4272734766,ENML56386,,active,True,fulfillment
519,MLB3532694787,BLHH53768,,active,True,fulfillment
520,MLB3534524129,TIOX11058,,active,True,fulfillment
521,MLB3536103973,WOJB37093,,active,True,fulfillment


In [13]:
### Populando banco de dados ###
try:
    conn = psycopg2.connect(**db_config)

    # Use a tabela fornecida como parâmetro
    # query = f"SELECT * FROM {table_item};"
    query = f"SELECT * FROM mcenter_items;"
    logger.info(query)
    df_items = pd.read_sql(query, conn)
except psycopg2.Error as e:
    logger.error(f"Erro do psycopg2 em 'items': {e}")
except Exception as e:
    logger.error(f"Erro ao consultar 'items': {e}")

dx = df_items.copy()
dy = df_sku_var.copy()

# Editando DFs
dx = dx.drop(columns=["created_at", "updated_at"])  # remove linhas de data
dx.replace("NaN", np.nan, inplace=True)  # altera de strin para NaN
dx = dx.astype(str)  # altera para tipo string
dy = dy.astype(str)

[32m2023-12-14 15:24:29.329[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mSELECT * FROM mcenter_items;[0m
  df_items = pd.read_sql(query, conn)


In [14]:
dx

Unnamed: 0,ml_code,inventory_id,value_name,variation_id,status,catalog_listing,logistic_type
0,MLB3778561802,AVOX05934,,,closed,True,fulfillment
1,MLB3436515963,WTHI04163,,,closed,True,fulfillment
2,MLB3778538730,JLAX10536,,,closed,True,fulfillment
3,MLB924922735,JFGN34621,,,paused,False,fulfillment
4,MLB1435852259,TLCG38735,,,active,False,fulfillment
...,...,...,...,...,...,...,...
526,MLB1440247563,GPGU38258,,,active,False,fulfillment
527,MLB1435969309,TPDH38567,,,active,False,fulfillment
528,MLB1490210934,SPKV63227,,,active,False,fulfillment
529,MLB1556975175,DZVJ05686,Marrom,57895096313,paused,False,fulfillment


In [15]:
x = 'ITDN89121'
y = dy[dy['inventory_id']==x]
n = dx[dx['inventory_id']==x]

y
n

Unnamed: 0,ml_code,inventory_id,value_name,variation_id,status,catalog_listing,logistic_type
65,MLB1965264557,ITDN89121,Preto,92895807779.0,paused,False,fulfillment
308,MLB3042832106,ITDN89121,,,paused,True,fulfillment


In [16]:
# verificando itens que existiam na tabela e não são retornados pelo endpoint
dx_not_in_dy = dx[~dx["inventory_id"].isin(dy["inventory_id"])]
dx_not_in_dy

Unnamed: 0,ml_code,inventory_id,value_name,variation_id,status,catalog_listing,logistic_type
65,MLB1965264557,ITDN89121,Preto,92895807779.0,paused,False,fulfillment
83,MLB1992512780,QTPB03410,,,paused,False,fulfillment
98,MLB1814087324,FTGG16520,,,active,False,fulfillment
105,MLB1992506451,VXCN97101,,,paused,False,fulfillment
179,MLB1990180014,ITSG97062,,,paused,True,fulfillment
296,MLB1990145508,ITSG97062,110V/220V,95383682601.0,paused,False,fulfillment
308,MLB3042832106,ITDN89121,,,paused,True,fulfillment


In [19]:
# buscando de itens em json
json_no_ful = []
c = 1
for item in dx_not_in_dy["ml_code"]:
    base_url = f"https://api.mercadolibre.com/items/{item}"
    headers = {"Authorization": f"Bearer {ACCESS_TOKEN_MCENTER}"}
    t = dx_not_in_dy.shape[0]
    logger.info(item)
    logger.info(f"{c}/{t}")
    c += 1

    try:
        response = requests.get(base_url, headers=headers)
        response.raise_for_status()
        data = response.json()
        json_no_ful.append(data)
        logger.info(f"Tamanho da nova lista: {len(json_no_ful)}/{t}")
    except requests.exceptions.RequestException as e:
        logger.error(f"Erro ao obter dados para o item {item}: {e}")

    # Se c for um múltiplo de 50, aguarde 1 minuto
    if c % 50 == 0:
        logger.warning("Esperando 1 minuto...")
        time.sleep(60)

logger.info(f"Tamanho da lista de itens: {len(json_no_ful)}")

# Salvando a lista de itens
caminho_arquivo = f"Data/Output/list_mcenter_items_no_ful.json"

with open(caminho_arquivo, "w") as arquivo:
    json.dump(json_no_ful, arquivo)

with open(caminho_arquivo, "r") as arquivo:
    json_no_ful = json.load(arquivo)

df_no_ful = pd.DataFrame(json_no_ful)

logger.info(f"Tamanho do dataframe de itens: {df_no_ful.shape}")
df_no_ful.sample()

[32m2023-12-14 15:27:19.006[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mMLB1965264557[0m
[32m2023-12-14 15:27:19.007[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1m1/7[0m
[32m2023-12-14 15:27:20.042[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m17[0m - [1mTamanho da nova lista: 1/7[0m
[32m2023-12-14 15:27:20.044[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mMLB1992512780[0m
[32m2023-12-14 15:27:20.044[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1m2/7[0m
[32m2023-12-14 15:27:21.028[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m17[0m - [1mTamanho da nova lista: 2/7[0m
[32m2023-12-14 15:27:21.031[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mMLB1814087324[0m
[32m2023-12-14 15:27:21.032[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1m3/7[0m
[

Unnamed: 0,id,site_id,title,seller_id,category_id,user_product_id,official_store_id,price,base_price,original_price,...,parent_item_id,differential_pricing,deal_ids,automatic_relist,date_created,last_updated,health,catalog_listing,item_relations,channels
1,MLB1992512780,MLB,Encordoamento 0.11 P/ Violão Aço 85/15 Bronze ...,233632476,MLB278076,,4572,29.9,29.9,,...,,,[],False,2021-08-26T17:02:17.000Z,2023-12-13T20:09:12.000Z,0.71,False,[],"[marketplace, mshops]"


In [22]:
# pegando dados em attributes
no_ful_attributes = []

for item in json_list_item:
    # Extrair os valores desejados
    first_id = item["id"]
    inventory_id = item["inventory_id"]
    variations = item["variations"]
    status = item["status"]
    catalog_product_id = item["catalog_product_id"]
    seller_custom_field = item["seller_custom_field"]
    catalog_listing = item["catalog_listing"]
    logistic_type = item["shipping"]["logistic_type"]
    item_relations = item["item_relations"]

    # Procurar em "attributes" onde "id" é "SELLER_SKU"
    seller_sku_entry = next(
        (attr for attr in item["attributes"] if attr["id"] == "SELLER_SKU"), None
    )

    # Pegar "value_name" e "value_id" se a entrada existir, caso contrário, definir como None
    attribute_value_name = (
        seller_sku_entry["value_name"] if seller_sku_entry else None
    )
    attribute_value_id = seller_sku_entry["value_id"] if seller_sku_entry else None

    # Adicionar os no_ful_attributes à lista
    no_ful_attributes.append(
        {
            "ml_code": first_id,
            "inventory_id": inventory_id,
            "status": status,
            "variations": variations,
            "catalog_listing": catalog_listing,
            "logistic_type":logistic_type,
        }
    )

df_sku_no_ful = pd.DataFrame(no_ful_attributes)

In [24]:
# pegando dados em variations
# variations: variation_id,  attribute_combination: value_id, value_name, seller_sku ,inventory_id
no_ful_variations = []
    
for item in json_list_item:
    # Extrair os valores comuns para cada item
    first_id = item.get("id")
    inventory_id = item.get("inventory_id")
    logistic_type = item.get("shipping", {}).get("logistic_type")

    # Extrair os valores específicos para cada variação
    for variacao in item.get("variations", []):
        variation_id = variacao.get("id")
        variation_seller_sku = variacao.get("seller_custom_field")
        variation_inventory_id = variacao.get("inventory_id")
        attribute_combination = variacao.get("attribute_combinations", [{}])[0]
        value_id = attribute_combination.get("value_id")
        value_name = attribute_combination.get("value_name")
        item_relations = attribute_combination.get("item_relations", [{}])[0]

        # Adicionar os no_ful_variations à lista
        no_ful_variations.append(
            {
                "ml_code": first_id,
                "inventory_id": inventory_id,
                # "logistic_type": logistic_type,
                "variation_id": variation_id,
                # "value_id": value_id,
                "value_name": value_name,
                # "var_seller_sku": variation_seller_sku,
                "variation_inventory_id": variation_inventory_id,
                # "item_relations":item_relations,
            }
        )

df_no_fulvariations = pd.DataFrame(no_ful_variations)

In [26]:
# Unindo as duas tabelas
df_no_ful = pd.merge(
    df_sku_no_ful,
    df_no_fulvariations,
    left_on=["ml_code", "inventory_id"],
    right_on=["ml_code", "inventory_id"],
    how="left",
)
df_no_ful = df_no_ful.drop(["variations", "variation_id"], axis=1)
df_no_ful

# *se variation_inventory_id = None -> variation_inventory_id == inventory_id && remove inventory_id && variation_inventory_id rename to inventory_id*
df_no_ful["variation_inventory_id"].fillna(
    df_no_ful["inventory_id"], inplace=True
)

# Editando tabela
cols = [
    "ml_code",
    "variation_inventory_id",
    "value_name",
    "status",
    "catalog_listing",
    "logistic_type"
]
df_no_ful = df_no_ful[cols]
df_no_ful = df_no_ful.rename(columns={"variation_inventory_id": "inventory_id"})

logger.info(f"Tamanho do dataframe final: {df_no_ful.shape}")

[32m2023-12-14 15:37:26.537[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m29[0m - [1mTamanho do dataframe final: (7, 6)[0m


In [27]:
df_no_ful

Unnamed: 0,ml_code,inventory_id,value_name,status,catalog_listing,logistic_type
0,MLB1965264557,ITDN89121,Preto,paused,False,cross_docking
1,MLB1992512780,QTPB03410,,paused,False,cross_docking
2,MLB1814087324,FTGG16520,,active,False,fulfillment
3,MLB1992506451,VXCN97101,,paused,False,cross_docking
4,MLB1990180014,ITSG97062,,paused,True,cross_docking
5,MLB1990145508,ITSG97062,110V/220V,paused,False,cross_docking
6,MLB3042832106,ITDN89121,,paused,True,cross_docking


In [28]:
dx_not_in_dy

Unnamed: 0,ml_code,inventory_id,value_name,variation_id,status,catalog_listing,logistic_type
65,MLB1965264557,ITDN89121,Preto,92895807779.0,paused,False,fulfillment
83,MLB1992512780,QTPB03410,,,paused,False,fulfillment
98,MLB1814087324,FTGG16520,,,active,False,fulfillment
105,MLB1992506451,VXCN97101,,,paused,False,fulfillment
179,MLB1990180014,ITSG97062,,,paused,True,fulfillment
296,MLB1990145508,ITSG97062,110V/220V,95383682601.0,paused,False,fulfillment
308,MLB3042832106,ITDN89121,,,paused,True,fulfillment


In [29]:
dy[dy['ml_code']=='MLB1965264557']

Unnamed: 0,ml_code,inventory_id,value_name,status,catalog_listing,logistic_type


In [31]:
dy = pd.concat([dy, df_no_ful], ignore_index=True)
dy

Unnamed: 0,ml_code,inventory_id,value_name,status,catalog_listing,logistic_type
0,MLB3778561802,AVOX05934,,closed,True,fulfillment
1,MLB3436515963,WTHI04163,,closed,True,fulfillment
2,MLB3778538730,JLAX10536,,closed,True,fulfillment
3,MLB924922735,JFGN34621,,paused,False,fulfillment
4,MLB949771924,TDFV00990,,paused,False,fulfillment
...,...,...,...,...,...,...
525,MLB1814087324,FTGG16520,,active,False,fulfillment
526,MLB1992506451,VXCN97101,,paused,False,cross_docking
527,MLB1990180014,ITSG97062,,paused,True,cross_docking
528,MLB1990145508,ITSG97062,110V/220V,paused,False,cross_docking


In [32]:

# Merge com base nas colunas ml_code e inventory_id a tabela do banco de dados com a busca de hoje + itens fora do full
merged_df = pd.merge(
    dy,
    dx,
    on=["ml_code", "inventory_id"],
    how="inner",
    suffixes=("_sku_var", "_items"),
)


In [33]:
# merged_df

Unnamed: 0,ml_code,inventory_id,value_name_sku_var,status_sku_var,catalog_listing_sku_var,logistic_type_sku_var,value_name_items,variation_id,status_items,catalog_listing_items,logistic_type_items
0,MLB3778561802,AVOX05934,,closed,True,fulfillment,,,closed,True,fulfillment
1,MLB3436515963,WTHI04163,,closed,True,fulfillment,,,closed,True,fulfillment
2,MLB3778538730,JLAX10536,,closed,True,fulfillment,,,closed,True,fulfillment
3,MLB924922735,JFGN34621,,paused,False,fulfillment,,,paused,False,fulfillment
4,MLB949771924,TDFV00990,,paused,False,fulfillment,,,paused,False,fulfillment
...,...,...,...,...,...,...,...,...,...,...,...
525,MLB1814087324,FTGG16520,,active,False,fulfillment,,,active,False,fulfillment
526,MLB1992506451,VXCN97101,,paused,False,cross_docking,,,paused,False,fulfillment
527,MLB1990180014,ITSG97062,,paused,True,cross_docking,,,paused,True,fulfillment
528,MLB1990145508,ITSG97062,110V/220V,paused,False,cross_docking,110V/220V,95383682601,paused,False,fulfillment


In [40]:

# Linhas com valores diferentes
different_rows = merged_df[
    (merged_df["value_name_sku_var"] != merged_df["value_name_items"])
    | (merged_df["status_sku_var"] != merged_df["status_items"])
    | (merged_df["catalog_listing_sku_var"] != merged_df["catalog_listing_items"])
]

# Compare os DataFrames
identicos = dx.equals(dy)
# Exiba o resultado
logger.info(f"Os DataFrames são idênticos? {identicos}")

[32m2023-12-14 15:44:33.434[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m11[0m - [1mOs DataFrames são idênticos? False[0m


In [62]:
# # different_rows
# dy.shape
# dx.shape
# dx.equals(dy)
# identicos
# different_rows

In [48]:

# Encontrar diferenças usando merge
diferencas = (
    # pd.merge(dx, dy, how="outer", indicator=True)
    pd.merge(dy, dx, how="outer", indicator=True)
    .query('_merge == "left_only"')
    .drop("_merge", axis=1)
)

# Criar um novo DataFrame apenas com as colunas modificadas
df_atualizado = dx.copy()
df_atualizado[diferencas.columns] = diferencas

# Remover linhas onde todos os valores em TODAS as colunas são NaN
df_atualizado_sem_nan = df_atualizado.dropna(
    how="all", subset=df_atualizado.columns
)

In [61]:
# df_atualizado_sem_nan['logistic_type'].value_counts()
# # df_atualizado

In [60]:
# df_atualizado_sem_nan

In [None]:
def get_update_items(access_token, seller_id, db_config, table_item):
    start_prog = time.time()  # Registra o inicio da aplicação
    # Consulta aos itens com logistic_type=fulfillment
    base_url = f"https://api.mercadolibre.com/users/{seller_id}/items/search?logistic_type=fulfillment"

    params = {
        "limit": 100,
        "offset": 0,
    }

    headers = {"Authorization": f"Bearer {access_token}"}

    # buscando lista de códigos
    json_list = []
    try:
        while True:
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status()
            data = response.json()
            logger.info(data)
            if "results" in data:
                json_list.extend(data["results"])
                logger.info(data["results"])
            else:
                break

            # Verifique se há mais páginas
            if "paging" in data:
                total_data = data["paging"].get("total")

                total_pages = math.ceil(total_data / params["limit"])
                logger.info(f"Total de páginas a serem processadas: {total_pages}")
                logger.info(f'Offset atual: {params["offset"]}')

                if params["offset"] >= total_pages * params["limit"]:
                    break

                params["offset"] += params["limit"]
            else:
                break

    except requests.exceptions.RequestException as req_err:
        logger.error(f"Erro ao fazer a requisição para {base_url}: {req_err}")
    except Exception as e:
        logger.error(f"Erro não esperado: {e}")

    df_json_list = pd.DataFrame(json_list)


    # buscando de itens em json
    json_list_item = []
    c = 1
    for item in json_list:
        base_url = f"https://api.mercadolibre.com/items/{item}"
        headers = {"Authorization": f"Bearer {access_token}"}
        t = len(json_list)
        logger.info(item)
        logger.info(f"{c}/{t}")
        c += 1

        try:
            response = requests.get(base_url, headers=headers)
            response.raise_for_status()
            data = response.json()
            json_list_item.append(data)
            logger.info(f"Tamanho da nova lista: {len(json_list_item)}/{t}")
        except requests.exceptions.RequestException as e:
            logger.error(f"Erro ao obter dados para o item {item}: {e}")

        # Se c for um múltiplo de 50, aguarde 1 minuto
        if c % 50 == 0:
            logger.warning("Esperando 1 minuto...")
            time.sleep(60)

    logger.info(f"Tamanho da lista de itens: {len(json_list_item)}")

    # Salvando a lista de itens
    caminho_arquivo = f"Data/Output/list_{table_item}.json"

    with open(caminho_arquivo, "w") as arquivo:
        json.dump(json_list_item, arquivo)

    with open(caminho_arquivo, "r") as arquivo:
        json_list_item = json.load(arquivo)

    df_list_item = pd.DataFrame(json_list_item)

    logger.info(f"Tamanho do dataframe de itens: {df_list_item.shape}")
    df_list_item.sample()

    # pegando dados em attributes
    # attributes: SELLER_SKU
    resultados_attributes = []

    for item in json_list_item:
        # Extrair os valores desejados
        first_id = item["id"]
        inventory_id = item["inventory_id"]
        variations = item["variations"]
        status = item["status"]
        catalog_product_id = item["catalog_product_id"]
        seller_custom_field = item["seller_custom_field"]
        catalog_listing = item["catalog_listing"]
        logistic_type = item["shipping"]["logistic_type"]
        item_relations = item["item_relations"]

        # Procurar em "attributes" onde "id" é "SELLER_SKU"
        seller_sku_entry = next(
            (attr for attr in item["attributes"] if attr["id"] == "SELLER_SKU"), None
        )

        # Pegar "value_name" e "value_id" se a entrada existir, caso contrário, definir como None
        attribute_value_name = (
            seller_sku_entry["value_name"] if seller_sku_entry else None
        )
        attribute_value_id = seller_sku_entry["value_id"] if seller_sku_entry else None

        # Adicionar os resultados_attributes à lista
        resultados_attributes.append(
            {
                "ml_code": first_id,
                "inventory_id": inventory_id,
                "status": status,
                "variations": variations,
                "catalog_listing": catalog_listing,
                "logistic_type":logistic_type,
            }
        )

    df_sku = pd.DataFrame(resultados_attributes)

    # pegando dados em variations
    # variations: variation_id,  attribute_combination: value_id, value_name, seller_sku ,inventory_id
    resultados_variations = []
        
    for item in json_list_item:
        # Extrair os valores comuns para cada item
        first_id = item.get("id")
        inventory_id = item.get("inventory_id")
        logistic_type = item.get("shipping", {}).get("logistic_type")

        # Extrair os valores específicos para cada variação
        for variacao in item.get("variations", []):
            variation_id = variacao.get("id")
            variation_seller_sku = variacao.get("seller_custom_field")
            variation_inventory_id = variacao.get("inventory_id")
            attribute_combination = variacao.get("attribute_combinations", [{}])[0]
            value_id = attribute_combination.get("value_id")
            value_name = attribute_combination.get("value_name")
            item_relations = attribute_combination.get("item_relations", [{}])[0]

            # Adicionar os resultados_variations à lista
            resultados_variations.append(
                {
                    "ml_code": first_id,
                    "inventory_id": inventory_id,
                    # "logistic_type": logistic_type,
                    "variation_id": variation_id,
                    # "value_id": value_id,
                    "value_name": value_name,
                    # "var_seller_sku": variation_seller_sku,
                    "variation_inventory_id": variation_inventory_id,
                    # "item_relations":item_relations,
                }
            )

    df_variations = pd.DataFrame(resultados_variations)

    # Unindo as duas tabelas
    df_sku_var = pd.merge(
        df_sku,
        df_variations,
        left_on=["ml_code", "inventory_id"],
        right_on=["ml_code", "inventory_id"],
        how="left",
    )
    df_sku_var = df_sku_var.drop(["variations", "variation_id"], axis=1)
    df_sku_var

    # *se variation_inventory_id = None -> variation_inventory_id == inventory_id && remove inventory_id && variation_inventory_id rename to inventory_id*
    df_sku_var["variation_inventory_id"].fillna(
        df_sku_var["inventory_id"], inplace=True
    )

    # Editando tabela
    cols = [
        "ml_code",
        "variation_inventory_id",
        "value_name",
        "status",
        "catalog_listing",
        "logistic_type"
    ]
    df_sku_var = df_sku_var[cols]
    df_sku_var = df_sku_var.rename(columns={"variation_inventory_id": "inventory_id"})

    logger.info(f"Tamanho do dataframe final: {df_sku_var.shape}")

    ### Populando banco de dados ###
    try:
        conn = psycopg2.connect(**db_config)

        # Use a tabela fornecida como parâmetro
        # query = f"SELECT * FROM {table_item};"
        query = f"SELECT * FROM {table_item};"
        logger.info(query)
        df_items = pd.read_sql(query, conn)
    except psycopg2.Error as e:
        logger.error(f"Erro do psycopg2 em 'items': {e}")
    except Exception as e:
        logger.error(f"Erro ao consultar 'items': {e}")

    dx = df_items.copy()
    dy = df_sku_var.copy()

    # Editando DFs
    dx = dx.drop(columns=["created_at", "updated_at"])  # remove linhas de data
    dx.replace("NaN", np.nan, inplace=True)  # altera de strin para NaN
    dx = dx.astype(str)  # altera para tipo string
    dy = dy.astype(str)

    # verificando itens que existiam na tabela e não são retornados pelo endpoint
    dx_not_in_dy = dx[~dx["inventory_id"].isin(dy["inventory_id"])]
    # dx_not_in_dy

    # buscando de itens em json
    json_no_ful = []
    c = 1
    for item in dx_not_in_dy["ml_code"]:
        base_url = f"https://api.mercadolibre.com/items/{item}"
        headers = {"Authorization": f"Bearer {access_token}"}
        t = dx_not_in_dy.shape[0]
        logger.info(item)
        logger.info(f"{c}/{t}")
        c += 1

        try:
            response = requests.get(base_url, headers=headers)
            response.raise_for_status()
            data = response.json()
            json_no_ful.append(data)
            logger.info(f"Tamanho da nova lista: {len(json_no_ful)}/{t}")
        except requests.exceptions.RequestException as e:
            logger.error(f"Erro ao obter dados para o item {item}: {e}")

        # Se c for um múltiplo de 50, aguarde 1 minuto
        if c % 50 == 0:
            logger.warning("Esperando 1 minuto...")
            time.sleep(60)

    logger.info(f"Tamanho da lista de itens: {len(json_no_ful)}")

    # Salvando a lista de itens
    caminho_arquivo = f"Data/Output/list_{table_item}_no_ful.json"

    with open(caminho_arquivo, "w") as arquivo:
        json.dump(json_no_ful, arquivo)

    with open(caminho_arquivo, "r") as arquivo:
        json_no_ful = json.load(arquivo)

    df_no_ful = pd.DataFrame(json_no_ful)

    logger.info(f"Tamanho do dataframe de itens: {df_no_ful.shape}")
    df_no_ful.sample()

    # pegando dados em attributes
    no_ful_attributes = []

    for item in json_list_item:
        # Extrair os valores desejados
        first_id = item["id"]
        inventory_id = item["inventory_id"]
        variations = item["variations"]
        status = item["status"]
        catalog_product_id = item["catalog_product_id"]
        seller_custom_field = item["seller_custom_field"]
        catalog_listing = item["catalog_listing"]
        logistic_type = item["shipping"]["logistic_type"]
        item_relations = item["item_relations"]

        # Procurar em "attributes" onde "id" é "SELLER_SKU"
        seller_sku_entry = next(
            (attr for attr in item["attributes"] if attr["id"] == "SELLER_SKU"), None
        )

        # Pegar "value_name" e "value_id" se a entrada existir, caso contrário, definir como None
        attribute_value_name = (
            seller_sku_entry["value_name"] if seller_sku_entry else None
        )
        attribute_value_id = seller_sku_entry["value_id"] if seller_sku_entry else None

        # Adicionar os no_ful_attributes à lista
        no_ful_attributes.append(
            {
                "ml_code": first_id,
                "inventory_id": inventory_id,
                "status": status,
                "variations": variations,
                "catalog_listing": catalog_listing,
                "logistic_type":logistic_type,
            }
        )

    df_sku_no_ful = pd.DataFrame(no_ful_attributes)

    # pegando dados em variations
    # variations: variation_id,  attribute_combination: value_id, value_name, seller_sku ,inventory_id
    no_ful_variations = []
        
    for item in json_list_item:
        # Extrair os valores comuns para cada item
        first_id = item.get("id")
        inventory_id = item.get("inventory_id")
        logistic_type = item.get("shipping", {}).get("logistic_type")

        # Extrair os valores específicos para cada variação
        for variacao in item.get("variations", []):
            variation_id = variacao.get("id")
            variation_seller_sku = variacao.get("seller_custom_field")
            variation_inventory_id = variacao.get("inventory_id")
            attribute_combination = variacao.get("attribute_combinations", [{}])[0]
            value_id = attribute_combination.get("value_id")
            value_name = attribute_combination.get("value_name")
            item_relations = attribute_combination.get("item_relations", [{}])[0]

            # Adicionar os no_ful_variations à lista
            no_ful_variations.append(
                {
                    "ml_code": first_id,
                    "inventory_id": inventory_id,
                    # "logistic_type": logistic_type,
                    "variation_id": variation_id,
                    # "value_id": value_id,
                    "value_name": value_name,
                    # "var_seller_sku": variation_seller_sku,
                    "variation_inventory_id": variation_inventory_id,
                    # "item_relations":item_relations,
                }
            )

    df_no_fulvariations = pd.DataFrame(no_ful_variations)

    # Unindo as duas tabelas
    df_no_ful = pd.merge(
        df_sku_no_ful,
        df_no_fulvariations,
        left_on=["ml_code", "inventory_id"],
        right_on=["ml_code", "inventory_id"],
        how="left",
    )
    df_no_ful = df_no_ful.drop(["variations", "variation_id"], axis=1)
    df_no_ful

    # *se variation_inventory_id = None -> variation_inventory_id == inventory_id && remove inventory_id && variation_inventory_id rename to inventory_id*
    df_no_ful["variation_inventory_id"].fillna(
        df_no_ful["inventory_id"], inplace=True
    )

    # Editando tabela
    cols = [
        "ml_code",
        "variation_inventory_id",
        "value_name",
        "status",
        "catalog_listing",
        "logistic_type"
    ]
    df_no_ful = df_no_ful[cols]
    df_no_ful = df_no_ful.rename(columns={"variation_inventory_id": "inventory_id"})

    logger.info(f"Tamanho do dataframe final: {df_no_ful.shape}")

    dy = pd.concat([dy, df_no_ful], ignore_index=True)

    # Merge com base nas colunas ml_code e inventory_id a tabela do banco de dados com a busca de hoje + itens fora do full
    merged_df = pd.merge(
        dy,
        dx,
        on=["ml_code", "inventory_id"],
        how="inner",
        suffixes=("_sku_var", "_items"),
    )

    # Linhas com valores diferentes
    different_rows = merged_df[
        (merged_df["value_name_sku_var"] != merged_df["value_name_items"])
        | (merged_df["status_sku_var"] != merged_df["status_items"])
        | (merged_df["catalog_listing_sku_var"] != merged_df["catalog_listing_items"])
    ]

    # Compare os DataFrames
    identicos = dx.equals(dy)
    # Exiba o resultado
    logger.info(f"Os DataFrames são idênticos? {identicos}")

    # Encontrar diferenças usando merge
    diferencas = (
        # pd.merge(dx, dy, how="outer", indicator=True)
        pd.merge(dy, dx, how="outer", indicator=True)
        .query('_merge == "left_only"')
        .drop("_merge", axis=1)
    )

    # Criar um novo DataFrame apenas com as colunas modificadas
    df_atualizado = dx.copy()
    df_atualizado[diferencas.columns] = diferencas

    # Remover linhas onde todos os valores em TODAS as colunas são NaN
    df_atualizado_sem_nan = df_atualizado.dropna(
        how="all", subset=df_atualizado.columns
    )

    conn = psycopg2.connect(**db_config)

    cursor = conn.cursor()

    # Iterar sobre as linhas do DataFrame e executar as atualizações no banco de dados
    for index, row in df_atualizado_sem_nan.iterrows():
        ml_code = row["ml_code"]
        inventory_id = row["inventory_id"]
        value_name = row["value_name"]
        status = row["status"]
        catalog_listing = row["catalog_listing"]
        logistic_type = row["logistic_type"]
        updated_at = datetime.now()  # Use a data/hora atual

        # Construir a instrução SQL de atualização
        query = f"UPDATE {table_item} SET value_name = %s, status = %s, catalog_listing = %s, updated_at = %s, logistic_type = %s  WHERE ml_code = %s AND inventory_id = %s"
        update_query = sql.SQL(query)
        logger.info(f"Inserindo dados: {[value for value in row]}")
        # Executar a instrução SQL
        cursor.execute(
            update_query,
            (
                value_name,
                status,
                catalog_listing,
                updated_at,
                logistic_type,
                ml_code,
                inventory_id,
            ),
        )

    conn.commit()

    cursor.close()
    conn.close()
    logger.info("Dados inseridos com sucesso!")

    # Encontrar linhas onde os pares ml_code e inventory_id em df_ficticio são diferentes de dx
    diferenca = pd.merge(
        dx, dy, on=["ml_code", "inventory_id"], how="right", indicator=True
    )

    # Filtrar apenas as linhas em que df_ficticio tem valores diferentes de dx
    diferenca = diferenca.query('_merge == "right_only"').drop(columns="_merge")

    # Selecionar colunas específicas e renomear
    diferenca = diferenca[
        ["ml_code", "inventory_id", "value_name_y", "status_y", "catalog_listing_y"]
    ]
    diferenca = diferenca.rename(
        columns={
            "value_name_y": "value_name",
            "status_y": "status",
            "catalog_listing_y": "catalog_listing",
        }
    )


    # Inserir novos dados no banco de dados
    conn = psycopg2.connect(**db_config)

    cursor = conn.cursor()

    # Use a tabela fornecida como parâmetro
    for index, row in diferenca.iterrows():
        query = f"INSERT INTO {table_item} (ml_code, inventory_id, value_name, status, catalog_listing) VALUES (%s, %s, %s, %s, %s)"
        insert_query = sql.SQL(query)
        logger.info(f"Inserindo dados: {[value for value in row]}")
        cursor.execute(
            insert_query,
            (
                row["ml_code"],
                row["inventory_id"],
                row["value_name"],
                row["status"],
                row["catalog_listing"],
            ),
        )

    conn.commit()

    # Feche o cursor e a conexão
    cursor.close()
    conn.close()
    logger.info("Atualização de itens finalizada!")

    end_prog = time.time()  # Registra o tempo depois de toda aplicação
    elapsed_time = end_prog - start_prog  # Calcula o tempo decorrido
    logger.info(f"Tempo Total do processo: {elapsed_time / 60} minutos")

In [None]:
# get_update_items(ACCESS_TOKEN_BUENOSHOPS, SELLER_ID_BUENOSHOPS, db_config, 'bueno_items')

In [None]:
# get_update_items(ACCESS_TOKEN_MUSICALCRIS, SELLER_ID_MUSICALCRIS, db_config, 'cris_items')

In [89]:
get_update_items(ACCESS_TOKEN_MCENTER, SELLER_ID_MCENTER, db_config, 'mcenter_items')

[32m2023-12-14 17:31:34.532[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_update_items[0m:[36m20[0m - [1m{'seller_id': '233632476', 'results': ['MLB3778561802', 'MLB3436515963', 'MLB3778538730', 'MLB924922735', 'MLB949771924', 'MLB949776093', 'MLB949788598', 'MLB949790387', 'MLB950297972', 'MLB950303724', 'MLB951043222', 'MLB1363602423', 'MLB1397153967', 'MLB1398114866', 'MLB1399101853', 'MLB1400763583', 'MLB1401964928', 'MLB1401946009', 'MLB1403147752', 'MLB1403024540', 'MLB1403999905', 'MLB1403934211', 'MLB1403920691', 'MLB1403791227', 'MLB1403763907', 'MLB1416821703', 'MLB1418498416', 'MLB1418470905', 'MLB1418466676', 'MLB1417658704', 'MLB1418014629', 'MLB1418532096', 'MLB1425101880', 'MLB1431387563', 'MLB1435969309', 'MLB1435852259', 'MLB1435836214', 'MLB1400355425', 'MLB1440291959', 'MLB1440288810', 'MLB1440247563', 'MLB1417877602', 'MLB1425113746', 'MLB1453957901', 'MLB1453221977', 'MLB1457193551', 'MLB1459840256', 'MLB1459799426', 'MLB1459799420', 'MLB1459771919', 'ML

KeyboardInterrupt: 

In [82]:
### Populando banco de dados ###
try:
    conn = psycopg2.connect(**db_config)

    query = f"SELECT * FROM cris_fulfillment_stock;"
    logger.info(query)
    df = pd.read_sql(query, conn)
except psycopg2.Error as e:
    logger.error(f"Erro do psycopg2 em 'items': {e}")
except Exception as e:
    logger.error(f"Erro ao consultar 'items': {e}")

df

[32m2023-12-14 16:58:37.897[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mSELECT * FROM cris_fulfillment_stock;[0m
  df = pd.read_sql(query, conn)


Unnamed: 0,ml_inventory_id,available_quantity,detail_status,detail_quantity,references_id,references_variation_id,created_at
0,KGZZ89270,2,,,MLB2077624899,173883881289,2023-12-12 00:54:42.500931
1,GOYP17092,2,,,MLB2077566813,0,2023-12-12 00:54:42.500931
2,GXOM30321,2,,,MLB3642965726,0,2023-12-12 00:54:42.500931
3,RWRR30846,0,,,MLB3321160629,0,2023-12-12 00:54:42.500931
4,DGMW97350,1,,,MLB3321186153,0,2023-12-12 00:54:42.500931
...,...,...,...,...,...,...,...
2701,XQVO00216,1,,,MLB3525779525,,2023-12-14 03:24:40.623132
2702,VLWK91331,0,,,MLB3469722247,,2023-12-14 03:24:40.624924
2703,GZIL90714,1,,,MLB3472439679,,2023-12-14 03:24:40.626705
2704,LDAC97015,2,,,MLB3531778745,180962355359,2023-12-14 03:24:40.628525


In [83]:
df['created_at'] = pd.to_datetime(df['created_at'])

# Filtrar e criar DataFrames separados para cada dia
df_12 = df[df['created_at'].dt.day == 12]
df_13 = df[df['created_at'].dt.day == 13]
df_14 = df[df['created_at'].dt.day == 14]

In [84]:
df_12

Unnamed: 0,ml_inventory_id,available_quantity,detail_status,detail_quantity,references_id,references_variation_id,created_at
0,KGZZ89270,2,,,MLB2077624899,173883881289,2023-12-12 00:54:42.500931
1,GOYP17092,2,,,MLB2077566813,0,2023-12-12 00:54:42.500931
2,GXOM30321,2,,,MLB3642965726,0,2023-12-12 00:54:42.500931
3,RWRR30846,0,,,MLB3321160629,0,2023-12-12 00:54:42.500931
4,DGMW97350,1,,,MLB3321186153,0,2023-12-12 00:54:42.500931
...,...,...,...,...,...,...,...
2537,CMPE00886,2,,,MLB4272236376,0,2023-11-12 01:00:00.000000
2538,BIXC96031,1,transfer,1.0,MLB3532357445,0,2023-11-12 01:00:00.000000
2539,LKQN96991,1,transfer,1.0,MLB3532268455,0,2023-11-12 01:00:00.000000
2540,IFOR00903,1,transfer,1.0,MLB4275596684,0,2023-11-12 01:00:00.000000


In [85]:
df_13

Unnamed: 0,ml_inventory_id,available_quantity,detail_status,detail_quantity,references_id,references_variation_id,created_at
2378,KGZZ89270,2,,,MLB2077624899,173883881289,2023-11-13 01:00:00.000000
2379,GOYP17092,2,,,MLB2077566813,0,2023-11-13 01:00:00.000000
2380,GXOM30321,2,,,MLB3642965726,0,2023-11-13 01:00:00.000000
2381,RWRR30846,0,,,MLB3321160629,0,2023-11-13 01:00:00.000000
2382,DGMW97350,1,,,MLB3321186153,0,2023-11-13 01:00:00.000000
...,...,...,...,...,...,...,...
2619,XQVO00216,1,,,MLB3525779525,,2023-12-13 04:38:35.772892
2620,VLWK91331,0,,,MLB3469722247,,2023-12-13 04:38:35.774643
2621,GZIL90714,1,,,MLB3472439679,,2023-12-13 04:38:35.776447
2622,LDAC97015,2,,,MLB3531778745,180962355359,2023-12-13 04:38:35.778215


In [86]:
df_14

Unnamed: 0,ml_inventory_id,available_quantity,detail_status,detail_quantity,references_id,references_variation_id,created_at
2296,KGZZ89270,2,,,MLB2077624899,173883881289,2023-11-14 01:00:00.000000
2297,GOYP17092,2,,,MLB2077566813,0,2023-11-14 01:00:00.000000
2298,GXOM30321,2,,,MLB3642965726,0,2023-11-14 01:00:00.000000
2299,RWRR30846,0,,,MLB3321160629,0,2023-11-14 01:00:00.000000
2300,DGMW97350,1,,,MLB3321186153,0,2023-11-14 01:00:00.000000
...,...,...,...,...,...,...,...
2701,XQVO00216,1,,,MLB3525779525,,2023-12-14 03:24:40.623132
2702,VLWK91331,0,,,MLB3469722247,,2023-12-14 03:24:40.624924
2703,GZIL90714,1,,,MLB3472439679,,2023-12-14 03:24:40.626705
2704,LDAC97015,2,,,MLB3531778745,180962355359,2023-12-14 03:24:40.628525


In [87]:
df_12 = df_12.drop(columns=['created_at'],axis=1)
df_13 = df_13.drop(columns=['created_at'],axis=1)
df_concatenado = pd.concat([df_12, df_13])

# Identificar linhas onde ml_inventory_id tem valores duplicados
duplicatas = df_concatenado.duplicated(subset=['ml_inventory_id'], keep=False)

# Filtrar o DataFrame concatenado com base na máscara de duplicatas
linhas_diferentes = df_concatenado[duplicatas]

In [88]:
# Mesclar os DataFrames com base na coluna ml_inventory_id
merged_df = pd.merge(df_12, df_13, on='ml_inventory_id', suffixes=('_df12', '_df13'))

# Filtrar as linhas onde available_quantity é diferente entre os dois DataFrames
diferencas = merged_df[merged_df['available_quantity_df12'] != merged_df['available_quantity_df13']]
diferencas

Unnamed: 0,ml_inventory_id,available_quantity_df12,detail_status_df12,detail_quantity_df12,references_id_df12,references_variation_id_df12,available_quantity_df13,detail_status_df13,detail_quantity_df13,references_id_df13,references_variation_id_df13
5,GOYP17092,2,,,MLB2077566813,0,1,,,MLB2077566813,
7,GOYP17092,2,,,MLB2077566813,0,1,,,MLB2077566813,
133,VLWK91331,1,,,MLB3469722247,0,0,,,MLB3469722247,
135,VLWK91331,1,,,MLB3469722247,0,0,,,MLB3469722247,
189,GZIL90714,0,transfer,1.0,MLB3472439679,0,1,,,MLB3472439679,
191,GZIL90714,0,transfer,1.0,MLB3472439679,0,1,,,MLB3472439679,
233,WOCN96065,3,transfer,2.0,MLB4230477964,0,5,,,MLB4230477964,
235,WOCN96065,3,transfer,2.0,MLB4230477964,0,5,,,MLB4230477964,
237,QTJU95736,4,transfer,1.0,MLB4230465656,0,5,,,MLB4230465656,
239,QTJU95736,4,transfer,1.0,MLB4230465656,0,5,,,MLB4230465656,
