In [1]:
import json
import os
import time
import psycopg2
from psycopg2 import sql
import math
import pandas as pd
import requests
from dotenv import load_dotenv
from datetime import datetime, timedelta
import numpy as np

load_dotenv()

ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")
HOST = os.getenv("HOST")
POSTGRES_DB = os.getenv("POSTGRES_DB")
POSTGRES_USER = os.getenv("POSTGRES_USER")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")


# Informações de conexão com o banco de dados PostgreSQL
db_config = {
    "host": HOST,
    "database": POSTGRES_DB,
    "user": POSTGRES_USER,
    "password": POSTGRES_PASSWORD,
}

# Registra o tempo antes da execução
start_prog = time.time()

In [39]:
def condf(df, coluna, valor):
    """
    Consulta um DataFrame com base em uma coluna e valor específicos.

    Parâmetros:
    - df: DataFrame a ser consultado.
    - coluna: Nome da coluna para a condição de consulta.
    - valor: Valor desejado na coluna.

    Retorna:
    Um DataFrame contendo apenas as linhas que atendem à condição.
    """
    resultado = df[df[coluna] == valor]
    return resultado


def condf_date(df, coluna_data, data_pesquisada):
    """
    Consulta um DataFrame com base em uma coluna de datas.

    Parâmetros:
    - df: DataFrame a ser consultado.
    - coluna_data: Nome da coluna de datas.
    - data_pesquisada: Data desejada para a consulta.

    Retorna:
    Um DataFrame contendo apenas as linhas que correspondem à data pesquisada.
    """
    resultado = df[pd.to_datetime(df[coluna_data]).dt.date == data_pesquisada]
    return resultado

In [None]:
base_url = "https://api.mercadolibre.com/users/233632476/items/search?logistic_type=fulfillment"

params = {
    "limit": 100,
    "offset": 0,
}

headers = {"Authorization": f"Bearer {ACCESS_TOKEN}"}

json_list = []

try:
    while True:
        response = requests.get(base_url, headers=headers, params=params)
        response.raise_for_status()
        data = response.json()
        print(data)
        if "results" in data:
            json_list.extend(data["results"])
            print(data["results"])
        else:
            break

        # Verifique se há mais páginas
        if "paging" in data:
            total_data = data["paging"].get("total")

            total_pages = math.ceil(total_data / params["limit"])
            print(f"Total de páginas a serem processadas: {total_pages}")
            print(f'Offset atual: {params["offset"]}')

            if params["offset"] >= total_pages * params["limit"]:
                break

            params["offset"] += params["limit"]
        else:
            break

except requests.exceptions.RequestException as req_err:
    print(f"Erro ao fazer a requisição para {base_url}: {req_err}")
except Exception as e:
    print(f"Erro não esperado: {e}")

print(f"Total esperado de dados: {total_data}")
print(f"Total de dados coletados: {len(json_list)}")

In [None]:
json_list_item = []

c = 1
for item in json_list:
    base_url = f"https://api.mercadolibre.com/items/{item}"
    headers = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
    t = len(json_list)
    print(item)
    print(f"{c}/{t}")
    c += 1

    try:
        response = requests.get(base_url, headers=headers)
        response.raise_for_status()
        data = response.json()
        json_list_item.append(data)
        print(f"Tamanho da nova lista: {len(json_list_item)}/{t}")
    except requests.exceptions.RequestException as e:
        print(f"Erro ao obter dados para o item {item}: {e}")

    # Se c for um múltiplo de 50, aguarde 1 minuto
    if c % 50 == 0:
        print("Esperando 1 minuto...")
        time.sleep(60)

print(f"Tamanho da lista de itens: {len(json_list_item)}")

In [5]:
# Salvando em um arquivo
caminho_arquivo = "../../Data/Output/lista_itens.json"

In [None]:
# Escrever a lista em um arquivo JSON
with open(caminho_arquivo, "w") as arquivo:
    json.dump(json_list_item, arquivo)

In [182]:
caminho_arquivo = "../../Data/Output/lista_itens.json"

with open(caminho_arquivo, "r") as arquivo:
    json_list_item = json.load(arquivo)

print(f"Tamanho da lista de itens: {len(json_list_item)}")
print(json_list_item)

Tamanho da lista de itens: 433


### Construindo tabela

In [183]:
df = pd.DataFrame(json_list_item)

print(f"Tamanho do dataframe de itens: {df.shape}")
df.sample()

Tamanho do dataframe de itens: (433, 59)


Unnamed: 0,id,site_id,title,seller_id,category_id,user_product_id,official_store_id,price,base_price,original_price,...,parent_item_id,differential_pricing,deal_ids,automatic_relist,date_created,last_updated,health,catalog_listing,item_relations,channels
350,MLB3434144067,MLB,"Cabo Santo Angelo Ninja L 4,57 Metros Cabos P/...",233632476,MLB72745,,4572,59.9,59.9,,...,,,[MLB23017],False,2023-09-04T19:27:28.000Z,2023-11-27T22:50:17.017Z,,True,"[{'id': 'MLB1629329134', 'variation_id': None,...","[marketplace, mshops]"


#### attributes: SELLER_SKU

In [184]:
resultados_attributes = []

for item in json_list_item:
    # Extrair os valores desejados
    first_id = item["id"]
    inventory_id = item["inventory_id"]
    variations = item["variations"]
    status = item["status"]
    catalog_product_id = item["catalog_product_id"]
    seller_custom_field = item["seller_custom_field"]
    catalog_listing = item["catalog_listing"]
    logistic_type = item["shipping"]["logistic_type"]
    item_relations = item["item_relations"]

    # Procurar em "attributes" onde "id" é "SELLER_SKU"
    seller_sku_entry = next(
        (attr for attr in item["attributes"] if attr["id"] == "SELLER_SKU"), None
    )

    # Obter os valores de "value_name" e "value_id" se a entrada existir, caso contrário, definir como None
    attribute_value_name = seller_sku_entry["value_name"] if seller_sku_entry else None
    attribute_value_id = seller_sku_entry["value_id"] if seller_sku_entry else None

    # attribute_value_name = item["attributes"][0]["value_name"]
    # attribute_value_id = item["attributes"][0]["value_id"]

    # Adicionar os resultados_attributes à lista
    resultados_attributes.append(
        {
            "ml_code": first_id,
            "inventory_id": inventory_id,
            # "logistic_type": logistic_type,
            # "sku": attribute_value_name,
            "status": status,
            "variations": variations,
            # "attribute_value_id": attribute_value_id,
            # "catalog_product_id": catalog_product_id,
            # "seller_custom_field": seller_custom_field,
            "catalog_listing": catalog_listing,
            # "item_relations": item_relations
        }
    )

# Exibir os resultados
print(resultados_attributes)
df_sku = pd.DataFrame(resultados_attributes)
print(df_sku.shape)
df_sku.sample()

[{'ml_code': 'MLB3778561802', 'inventory_id': 'AVOX05934', 'status': 'closed', 'variations': [], 'catalog_listing': True}, {'ml_code': 'MLB3436515963', 'inventory_id': 'WTHI04163', 'status': 'closed', 'variations': [], 'catalog_listing': True}, {'ml_code': 'MLB3778538730', 'inventory_id': 'JLAX10536', 'status': 'closed', 'variations': [], 'catalog_listing': True}, {'ml_code': 'MLB924922735', 'inventory_id': 'JFGN34621', 'status': 'paused', 'variations': [], 'catalog_listing': False}, {'ml_code': 'MLB949771924', 'inventory_id': 'TDFV00990', 'status': 'paused', 'variations': [], 'catalog_listing': False}, {'ml_code': 'MLB949776093', 'inventory_id': 'QYGO52123', 'status': 'paused', 'variations': [], 'catalog_listing': False}, {'ml_code': 'MLB949788598', 'inventory_id': 'ERIM51807', 'status': 'paused', 'variations': [], 'catalog_listing': False}, {'ml_code': 'MLB949790387', 'inventory_id': 'DGBX50777', 'status': 'paused', 'variations': [], 'catalog_listing': False}, {'ml_code': 'MLB9502979

Unnamed: 0,ml_code,inventory_id,status,variations,catalog_listing
300,MLB3370131403,YDQA03755,paused,[],True


#### variations: variation_id,  attribute_combination: value_id, value_name, seller_sku ,inventory_id 

In [185]:
resultados_variations = []

for item in json_list_item:
    # Extrair os valores comuns para cada item
    first_id = item.get("id")
    inventory_id = item.get("inventory_id")
    logistic_type = item.get("shipping", {}).get("logistic_type")

    # Iterar sobre cada variação no item
    for variacao in item.get("variations", []):
        # Extrair os valores específicos para cada variação
        variation_id = variacao.get("id")
        variation_seller_sku = variacao.get("seller_custom_field")
        variation_inventory_id = variacao.get("inventory_id")
        attribute_combination = variacao.get("attribute_combinations", [{}])[0]
        value_id = attribute_combination.get("value_id")
        value_name = attribute_combination.get("value_name")
        item_relations = attribute_combination.get("item_relations", [{}])[0]

        # Adicionar os resultados_variations à lista
        resultados_variations.append(
            {
                "ml_code": first_id,
                "inventory_id": inventory_id,
                # "logistic_type": logistic_type,
                "variation_id": variation_id,
                # "value_id": value_id,
                "value_name": value_name,
                # "var_seller_sku": variation_seller_sku,
                "variation_inventory_id": variation_inventory_id,
                # "item_relations":item_relations,
            }
        )

# Exibir os resultados_variations
print(resultados_variations)
df_variations = pd.DataFrame(resultados_variations)
print(df_variations.shape)
df_variations.sample()

[{'ml_code': 'MLB1398114866', 'inventory_id': None, 'variation_id': 174302125663, 'value_name': '0.010', 'variation_inventory_id': 'DXJZ75064'}, {'ml_code': 'MLB1398114866', 'inventory_id': None, 'variation_id': 174302061130, 'value_name': '0.012', 'variation_inventory_id': 'TNXB83665'}, {'ml_code': 'MLB1398114866', 'inventory_id': None, 'variation_id': 174301393967, 'value_name': '0.009', 'variation_inventory_id': 'JDMV76187'}, {'ml_code': 'MLB1398114866', 'inventory_id': None, 'variation_id': 174302805456, 'value_name': '0.011', 'variation_inventory_id': 'RJUL81287'}, {'ml_code': 'MLB1403147752', 'inventory_id': None, 'variation_id': 48946752091, 'value_name': 'Madeira', 'variation_inventory_id': 'GLMK21133'}, {'ml_code': 'MLB1403024540', 'inventory_id': None, 'variation_id': 48941021297, 'value_name': 'Madeira', 'variation_inventory_id': 'ABCB20467'}, {'ml_code': 'MLB1418532096', 'inventory_id': None, 'variation_id': 49773918421, 'value_name': 'Preto/Madeira', 'variation_inventory_i

Unnamed: 0,ml_code,inventory_id,variation_id,value_name,variation_inventory_id
139,MLB2986538160,,175882106540,Laranja,XVUA60406


In [186]:
df_variations["variation_inventory_id"].value_counts()

variation_inventory_id
DXJZ75064    1
YLJM05104    1
ZRDQ91074    1
SIZW84848    1
TAPQ85749    1
            ..
LNUD04074    1
LUHT04035    1
CLMF99895    1
ELQK04205    1
BZGW51616    1
Name: count, Length: 170, dtype: int64

In [187]:
df_variations["inventory_id"].value_counts()

inventory_id
QIXA78418    6
WLTP49457    5
VLTS11268    3
Name: count, dtype: int64

In [188]:
df_variations["variation_inventory_id"].value_counts()

variation_inventory_id
DXJZ75064    1
YLJM05104    1
ZRDQ91074    1
SIZW84848    1
TAPQ85749    1
            ..
LNUD04074    1
LUHT04035    1
CLMF99895    1
ELQK04205    1
BZGW51616    1
Name: count, Length: 170, dtype: int64

In [189]:
id = "UYFJ61970"
xpto = df_variations[df_variations["variation_inventory_id"] == id]
xpto

Unnamed: 0,ml_code,inventory_id,variation_id,value_name,variation_inventory_id
141,MLB2986538160,,175882905476,Roxo,UYFJ61970


In [190]:
id = "MLB2986538160"
xpto = df_variations[df_variations["ml_code"] == id]
xpto

Unnamed: 0,ml_code,inventory_id,variation_id,value_name,variation_inventory_id
136,MLB2986538160,,175888357629,Branco,YHXD60928
137,MLB2986538160,,175883216483,Pink,BUIO60614
138,MLB2986538160,,175887933859,Verde,RKDM61361
139,MLB2986538160,,175882106540,Laranja,XVUA60406
140,MLB2986538160,,175886400744,Amarelo,LXOO59456
141,MLB2986538160,,175882905476,Roxo,UYFJ61970
142,MLB2986538160,,175887797707,Azul,RXXA58942
143,MLB2986538160,,175887797708,Vermelho,PPWL59058


In [191]:
df_variations["inventory_id"].value_counts()

inventory_id
QIXA78418    6
WLTP49457    5
VLTS11268    3
Name: count, dtype: int64

In [192]:
df_variations["value_name"].value_counts()

value_name
Preto                      20
Marrom                     18
1,5                         6
Vermelho                    6
2                           6
                           ..
0.045                       1
2 PARES 5B                  1
2 PARES 5A                  1
3 PARES 5B - 3 PARES 7A     1
2 Pares 7A                  1
Name: count, Length: 69, dtype: int64

### Unindo as duas tabelas

In [242]:
df_sku_var = pd.merge(
    df_sku,
    df_variations,
    left_on=["ml_code", "inventory_id"],
    right_on=["ml_code", "inventory_id"],
    how="left",
)
df_sku_var = df_sku_var.drop(["variations", "variation_id"], axis=1)
df_sku_var

Unnamed: 0,ml_code,inventory_id,status,catalog_listing,value_name,variation_inventory_id
0,MLB3778561802,AVOX05934,closed,True,,
1,MLB3436515963,WTHI04163,closed,True,,
2,MLB3778538730,JLAX10536,closed,True,,
3,MLB924922735,JFGN34621,paused,False,,
4,MLB949771924,TDFV00990,paused,False,,
...,...,...,...,...,...,...
517,MLB3516894673,PPWL59058,paused,True,,
518,MLB4237823132,WEPI04061,paused,True,,
519,MLB3521876523,HGRL10383,active,True,,
520,MLB3521778415,PSMV02965,active,True,,


#### *se variation_inventory_id = None -> variation_inventory_id == inventory_id && remove inventory_id && variation_inventory_id rename to inventory_id*


In [243]:
df_sku_var["variation_inventory_id"].fillna(df_sku_var["inventory_id"], inplace=True)

# verificando se união funcionou
result = df_sku_var[df_sku_var["variation_inventory_id"] == df_sku_var["inventory_id"]]
result.sample(2)

Unnamed: 0,ml_code,inventory_id,status,catalog_listing,value_name,variation_inventory_id
197,MLB2001015502,ITQQ97036,active,False,,ITQQ97036
353,MLB3286467401,YAZB38863,active,True,,YAZB38863


In [244]:
df_sku_var.columns

Index(['ml_code', 'inventory_id', 'status', 'catalog_listing', 'value_name',
       'variation_inventory_id'],
      dtype='object')

In [245]:
# df_sku_var = df_sku_var.drop(['inventory_id'], axis=1)
cols = ["ml_code", "variation_inventory_id", "value_name", "status", "catalog_listing"]
df_sku_var = df_sku_var[cols]
df_sku_var = df_sku_var.rename(columns={"variation_inventory_id": "inventory_id"})

print(f"Tamanho do dataframe final: {df_sku_var.shape}")
df_sku_var

Tamanho do dataframe final: (522, 5)


Unnamed: 0,ml_code,inventory_id,value_name,status,catalog_listing
0,MLB3778561802,AVOX05934,,closed,True
1,MLB3436515963,WTHI04163,,closed,True
2,MLB3778538730,JLAX10536,,closed,True
3,MLB924922735,JFGN34621,,paused,False
4,MLB949771924,TDFV00990,,paused,False
...,...,...,...,...,...
517,MLB3516894673,PPWL59058,,paused,True
518,MLB4237823132,WEPI04061,,paused,True
519,MLB3521876523,HGRL10383,,active,True
520,MLB3521778415,PSMV02965,,active,True


#### verificações

In [246]:
count_nul = df_sku_var["inventory_id"].isna().sum()
rows_with_nulls = df_sku_var.loc[df_sku_var["inventory_id"].isna()]
print(df_sku_var.shape)
df_sku_var["inventory_id"].value_counts()
count_nul
rows_with_nulls  # fogão cocktop

(522, 5)


Unnamed: 0,ml_code,inventory_id,value_name,status,catalog_listing
145,MLB1848500794,,,active,True


In [247]:
# # Mostrar e contar valores únicos em todas as colunas
# for coluna in df_sku_var.columns:
#     unique_values = df_sku_var[coluna].unique()
#     count_unique = df_sku_var[coluna].nunique()
#     print(f"Valores únicos na coluna '{coluna}': {unique_values}")
#     print(f"Número de valores únicos na coluna '{coluna}': {count_unique}")
#     print("\n-----------------------------\n")

## Populando banco de dados

In [199]:
df_sku_var.columns

Index(['ml_code', 'inventory_id', 'value_name', 'status', 'catalog_listing'], dtype='object')

In [200]:
df_sku_var.dtypes

ml_code            object
inventory_id       object
value_name         object
status             object
catalog_listing      bool
dtype: object

In [32]:
conn = psycopg2.connect(**db_config)

cursor = conn.cursor()

for index, row in df_sku_var.iterrows():
    insert_query = sql.SQL(
        "INSERT INTO items (ml_code, inventory_id, value_name, status, catalog_listing) VALUES (%s, %s, %s, %s, %s)"
    )
    cursor.execute(
        insert_query,
        (
            row["ml_code"],
            row["inventory_id"],
            row["value_name"],
            row["status"],
            row["catalog_listing"],
        ),
    )

conn.commit()

# Feche o cursor e a conexão
cursor.close()
conn.close()
print("Dados inseridos com sucesso!")

Dados inseridos com sucesso!


## Ler tabela, atualizar dados e acrescentar novos

In [248]:
try:
    conn = psycopg2.connect(**db_config)

    query = "SELECT * FROM items;"
    df_items = pd.read_sql(query, conn)
except psycopg2.Error as e:
    print(f"Erro do psycopg2 em 'items': {e}")
    # TODO log
except Exception as e:
    print(f"Erro ao consultar 'items': {e}")
    # TODO log

print(df_items.shape)
# df_items

(522, 7)


  df_items = pd.read_sql(query, conn)


### Comparando dataframe da consulta com o banco de dados

In [249]:
df_new = df_sku_var.copy()
df_db = df_items.copy()

In [250]:
# df_ori.dtypes
# df_db.dtypes
# df_new.dtypes

In [251]:
# df_ori.head()
df_db = df_db.drop(columns=["created_at", "updated_at"])
# df_db.head()
# # df_new.head()

In [252]:
# df_db['value_name'].iloc[0]
# # df_new['value_name'].iloc[0]

In [224]:
df_db.replace("NaN", np.nan, inplace=True)
# df_db['value_name'].iloc[0]

In [225]:
# Compare os DataFrames
identicos = df_db.equals(df_new)

# Exiba o resultado
print("Os DataFrames são idênticos:", identicos)

Os DataFrames são idênticos: False


In [255]:
# diferencas = df_db.compare(df_new)
diferencas = df_new.compare(df_db)

# Exiba as diferenças
print("Diferenças entre os DataFrames:")
diferencas

Diferenças entre os DataFrames:


Unnamed: 0_level_0,value_name,value_name
Unnamed: 0_level_1,self,other
0,,
1,,
2,,
3,,
4,,
...,...,...
517,,
518,,
519,,
520,,


In [180]:
df_diferencas = pd.merge(df_new, df_db, how="inner")
df_diferencas = pd.merge(df_new, df_db, how="outer")
df_diferencas = pd.merge(df_new, df_db, how="left")
df_diferencas = pd.merge(df_db, df_new, how="left")
# df_diferencas = pd.merge(df_db, df_new, how='inner')
# df_diferencas = pd.merge(df_db, df_new, how='outer')

df_diferencas

Unnamed: 0,ml_code,inventory_id,value_name,status,catalog_listing
0,MLB3778561802,AVOX05934,,closed,True
1,MLB3436515963,WTHI04163,,closed,True
2,MLB3778538730,JLAX10536,,closed,True
3,MLB924922735,JFGN34621,,paused,False
4,MLB949771924,TDFV00990,,paused,False
...,...,...,...,...,...
517,MLB3516894673,PPWL59058,,paused,True
518,MLB4237823132,WEPI04061,,paused,True
519,MLB3521876523,HGRL10383,,active,True
520,MLB3521778415,PSMV02965,,active,True


In [173]:
# Encontrar as linhas em df_db que não existem em df_new
df_diferencas = pd.merge(df_db, df_new, how="left", indicator=True)
df_diferencas = pd.merge(df_db, df_new, how="left", indicator=True).loc[
    lambda x: x["_merge"] == "left_only"
]
df_diferencas = (
    pd.merge(df_db, df_new, how="left", indicator=True)
    .loc[lambda x: x["_merge"] == "left_only"]
    .drop(columns=["_merge"])
)

df_diferencas

Unnamed: 0,ml_code,inventory_id,value_name,status,catalog_listing
5,MLB949776093,QYGO52123,,paused,False
6,MLB949788598,ERIM51807,,paused,False
7,MLB949790387,DGBX50777,,paused,False


In [166]:
df_db.shape
df_new.shape

(522, 5)

In [167]:
# Atualizar valores nas colunas desejadas
merged_df["value_name_db"] = merged_df["value_name_new"].combine_first(
    merged_df["value_name_db"]
)
merged_df

Unnamed: 0,ml_code,inventory_id,value_name_db,status_db,catalog_listing_db,value_name_new,status_new,catalog_listing_new
0,MLB3778561802,AVOX05934,,closed,True,,closed,True
1,MLB3436515963,WTHI04163,,closed,True,,closed,True
2,MLB3778538730,JLAX10536,,closed,True,,closed,True
3,MLB924922735,JFGN34621,,paused,False,,paused,False
4,MLB949771924,TDFV00990,,paused,False,,paused,False
...,...,...,...,...,...,...,...,...
514,MLB3516894673,PPWL59058,,paused,True,,paused,True
515,MLB4237823132,WEPI04061,,paused,True,,paused,True
516,MLB3521876523,HGRL10383,,active,True,,active,True
517,MLB3521778415,PSMV02965,,active,True,,active,True


In [169]:
merged_df["catalog_listing_db"] = merged_df["catalog_listing_new"].combine_first(
    merged_df["catalog_listing_db"]
)
merged_df

Unnamed: 0,ml_code,inventory_id,value_name_db,status_db,catalog_listing_db,value_name_new,status_new,catalog_listing_new
0,MLB3778561802,AVOX05934,,closed,True,,closed,True
1,MLB3436515963,WTHI04163,,closed,True,,closed,True
2,MLB3778538730,JLAX10536,,closed,True,,closed,True
3,MLB924922735,JFGN34621,,paused,False,,paused,False
4,MLB949771924,TDFV00990,,paused,False,,paused,False
...,...,...,...,...,...,...,...,...
514,MLB3516894673,PPWL59058,,paused,True,,paused,True
515,MLB4237823132,WEPI04061,,paused,True,,paused,True
516,MLB3521876523,HGRL10383,,active,True,,active,True
517,MLB3521778415,PSMV02965,,active,True,,active,True
