In [1]:
import json
import os
import time
from pandas import json_normalize
import psycopg2
from psycopg2 import sql
import math
import pandas as pd
import requests
from dotenv import load_dotenv
from datetime import datetime, timedelta
import numpy as np

load_dotenv()

ACCESS_TOKEN_MUSICALCRIS = os.getenv("ACCESS_TOKEN_MUSICALCRIS")
SELLER_ID_MUSICALCRIS = os.getenv("SELLER_ID_MUSICALCRIS")

ACCESS_TOKEN_BUENOSHOPS = os.getenv("ACCESS_TOKEN_BUENOSHOPS")
SELLER_ID_BUENOSHOPS = os.getenv("SELLER_ID_BUENOSHOPS")

ACCESS_TOKEN_MCENTER = os.getenv("ACCESS_TOKEN_MCENTER")
SELLER_ID_MCENTER = os.getenv("SELLER_ID_MCENTER")

HOST = os.getenv("HOST")
POSTGRES_DB = os.getenv("POSTGRES_DB")
POSTGRES_USER = os.getenv("POSTGRES_USER")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")

# Informações de conexão com o banco de dados PostgreSQL
db_config = {
    "host": HOST,
    "database": POSTGRES_DB,
    "user": POSTGRES_USER,
    "password": POSTGRES_PASSWORD,
}

In [2]:
def write_file(json_data, nome_arquivo):
    """
    Escreve dados em um arquivo JSON, adicionando ao arquivo existente se ele já existir.

    Parâmetros:
    - json_data (list): Lista de dados em formato JSON a serem escritos no arquivo.
    - nome_arquivo (str): Nome do arquivo onde os dados serão escritos ou adicionados.

    Exemplo de uso:
    ```python
    json_list = [{'order_id': 1, 'product': 'Item 1'}, {'order_id': 2, 'product': 'Item 2'}]
    write_file(json_list, 'orders.json')
    ```

    Se o arquivo já existir, os dados fornecidos serão adicionados aos dados existentes.
    Se o arquivo não existir, um novo arquivo será criado e os dados serão escritos nele.
    """
    if os.path.exists(nome_arquivo):
        with open(nome_arquivo, "r") as arquivo_existente:
            dados_existente = json.load(arquivo_existente)

        dados_existente.extend(json_data)

        with open(nome_arquivo, "w") as arquivo:
            json.dump(dados_existente, arquivo)
    else:
        with open(nome_arquivo, "w") as arquivo:
            json.dump(json_data, arquivo)

#### Verificando dados de usuário

In [4]:
#### Verificando dados de usuário
url = "https://api.mercadolibre.com/users/me"

payload = {}
# headers = {"Authorization": f"Bearer {ACCESS_TOKEN_BUENOSHOPS}"}
# headers = {"Authorization": f"Bearer {ACCESS_TOKEN_MUSICALCRIS}"}
headers = {"Authorization": f"Bearer {ACCESS_TOKEN_BUENOSHOPS}"}

response = requests.request("GET", url, headers=headers, data=payload)
response = response.text

print(response)

{"id":648304687,"nickname":"BUENO SONORIZAÇÃO","registration_date":"2020-09-18T20:40:32.000-04:00","first_name":"M A BUENO FREDERICO SONORIZACA","last_name":"M A BUENO FREDERICO SONORIZACA","gender":"","country_id":"BR","email":"mabuenosonorizacao@gmail.com","identification":{"number":"37408559000165","type":"CNPJ"},"address":{"address":"Rua Fernandes Pinheiro 168","city":"Ponta Grossa","state":"BR-PR","zip_code":"84010135"},"phone":{"area_code":"42","extension":"","number":"30253124","verified":false},"alternative_phone":{"area_code":"","extension":"","number":""},"user_type":"brand","tags":["brand","large_seller","eshop","mshops","messages_as_seller"],"logo":null,"points":163,"site_id":"MLB","permalink":"http://perfil.mercadolivre.com.br/BUENO+SONORIZA%C3%87%C3%83O","seller_experience":"ADVANCED","bill_data":{"accept_credit_note":"Y"},"seller_reputation":{"level_id":"5_green","power_seller_status":"platinum","transactions":{"canceled":1053,"completed":24379,"period":"historic","ratin

In [5]:
json_list = []
len(json_list)

0

## Coletando orders

In [9]:
# Função para obter dados para um intervalo de datas específico
def get_orders_for_date_range(date_from, date_to, offset=0, limit=50):
    base_url = "https://api.mercadolibre.com/orders/search"

    params = {
        "seller": f"{SELLER_ID_BUENOSHOPS}",
        # "seller": f"{SELLER_ID_MCENTER}",
        "order.date_closed.from": f"{date_from}T00:00:00.000-03:00",
        "order.date_closed.to": f"{date_to}T00:00:00.000-03:00",
        "limit": limit,
        "offset": offset,
    }

    headers = {"Authorization": f"Bearer {ACCESS_TOKEN_BUENOSHOPS}"}
    # headers = {"Authorization": f"Bearer {ACCESS_TOKEN_MCENTER}"}

    # json_list = []
    counter = 0

    try:
        while True:
            response = requests.get(base_url, params=params, headers=headers)
            response.raise_for_status()
            data = response.json()

            if "results" in data:
                json_list.extend(data["results"])
            else:
                break

            if "paging" in data:
                total_paging = data["paging"].get("total")
                if total_paging is None:
                    break

                total_pages = math.ceil(total_paging / params["limit"])
                print(f"Total esperado de páginas: {counter + 1}/{total_pages}")
                print(f"Total de dados esperados: {total_paging}")
                print(f'Offset atual: {params["offset"]}')
                print(f"Intervalo de datas atual: {date_from} - {date_to}")

                if params["offset"] > total_paging:
                    break

                params["offset"] += params["limit"]
                counter += 1
            else:
                break

    except requests.exceptions.RequestException as req_err:
        print(f"Erro ao fazer a requisição para {base_url}: {req_err}")

    except Exception as e:
        print(f"Erro não esperado: {e}")

    print(f"Total de dados coletados para {date_from} - {date_to}: {len(json_list)}")
    return json_list



In [10]:

# Definir os intervalos de datas desejados
date_ranges = [
    ("2023-12-6", "2023-12-12"),
    # Adicione mais intervalos conforme necessário
]

json_list = []  # Certifique-se de iniciar a lista

success_count = 0
total_iterations = len(date_ranges)

# Executar a função para cada intervalo de datas
for index, date_range in enumerate(date_ranges, start=1):
    date_from, date_to = date_range

    json_data = get_orders_for_date_range(date_from, date_to)

    if json_data:
        json_list.extend(json_data)
        success_count += 1

    # Exibir o progresso
    progress_percentage = (index / total_iterations) * 100
    print(f"Progresso: {index}/{total_iterations} ({progress_percentage:.2f}%)")

# Exibir os resultados
print(f"Total de dados coletados para todos os intervalos: {len(json_list)}")
print(f"Número total de iterações: {total_iterations}")
print(f"Número de iterações bem-sucedidas: {success_count}")
print(f"Número de iterações falhadas: {total_iterations - success_count}")

Total esperado de páginas: 1/10
Total de dados esperados: 485
Offset atual: 0
Intervalo de datas atual: 2023-12-6 - 2023-12-12
Total esperado de páginas: 2/10
Total de dados esperados: 485
Offset atual: 50
Intervalo de datas atual: 2023-12-6 - 2023-12-12
Total esperado de páginas: 3/10
Total de dados esperados: 485
Offset atual: 100
Intervalo de datas atual: 2023-12-6 - 2023-12-12
Total esperado de páginas: 4/10
Total de dados esperados: 485
Offset atual: 150
Intervalo de datas atual: 2023-12-6 - 2023-12-12
Total esperado de páginas: 5/10
Total de dados esperados: 485
Offset atual: 200
Intervalo de datas atual: 2023-12-6 - 2023-12-12
Total esperado de páginas: 6/10
Total de dados esperados: 485
Offset atual: 250
Intervalo de datas atual: 2023-12-6 - 2023-12-12
Total esperado de páginas: 7/10
Total de dados esperados: 485
Offset atual: 300
Intervalo de datas atual: 2023-12-6 - 2023-12-12
Total esperado de páginas: 8/10
Total de dados esperados: 485
Offset atual: 350
Intervalo de datas a

In [11]:
print(len(json_list))
# json_list

970


In [12]:
write_file(json_list, "../../Data/Output/cris_orders.json")

In [13]:
# with open("../../Data/Output/bueno_orders.json", "r") as file:
#     json_list = json.load(file)

# len(json_list)

### Coletando dados de orders


In [14]:
resultados = []

for item in json_list:
    # Extrair os valores desejados
    payments = item.get("payments", [])
    status = item["status"]
    date_closed = item["date_closed"]
    pack_id = item["pack_id"]
    shipping = item["shipping"]
    order_items = item.get("order_items", [])
    fulfilled = item["fulfilled"]

    for payment in payments:
        order_id = payment["order_id"]
        reason = payment["reason"]
        payment_status = payment["status"]
        date_approved = payment["date_approved"]

    # Inicializa variável para armazenar shipping_id
    shipping_id = None
    shipping_id = shipping["id"]

    # # Inicializa listas para armazenar informações específicas de order_items

    # Itera sobre os dicionários em order_items
    for order_item in order_items:
        item_info = order_item.get("item", {})
        # item_id = item_info.get("id")
        ml_code = item_info["id"]
        title = item_info["title"]
        variation_id = item_info["variation_id"]
        seller_sku = item_info["seller_sku"]
        quantity = order_item["quantity"]
        category_id = item_info["category_id"]

        variation_attributes = item_info.get("variation_attributes", [])

        name = None
        value_id = None
        value_name = None
        for attribute in variation_attributes:
            name = attribute["name"]
            id = attribute["id"]
            value_id = attribute["value_id"]
            value_name = attribute["value_name"]

    # Adicionar os resultados à lista
    resultados.append(
        {
            # "payments": payments,
            "ml_code": ml_code,
            "payment_status": payment_status,
            "order_status": status,
            "order_id": order_id,
            "shipping_id": shipping_id,
            "pack_id": pack_id,
            "title": title,
            "variation_id": variation_id,
            "category_id": category_id,
            "seller_sku": seller_sku,
            "quantity": quantity,
            "variation_name": name,
            "variation_attributes_id": id,
            "variation_value_id": value_id,
            "variation_value_name": value_name,
            "date_approved": date_approved,
            "date_closed": date_closed,
        }
    )

# Exibir os resultados
pd.set_option("display.max_colwidth", None)

df = pd.DataFrame(resultados)

# Tratando dados numéricos
pd.set_option("display.float_format", "{:.0f}".format)
df["shipping_id"] = df["shipping_id"].fillna(0)
df["pack_id"] = df["pack_id"].fillna(0)
df["variation_id"] = df["variation_id"].fillna(0)
df["shipping_id"] = df["shipping_id"].astype("int64")
df["pack_id"] = df["pack_id"].astype("int64")
df["variation_id"] = df["variation_id"].astype("int64")


# Adiciona 1h a mais para chegar ao horário do Brasil
df["date_approved"] = pd.to_datetime(df["date_approved"])
df["date_closed"] = pd.to_datetime(df["date_closed"])
df["date_approved"] = df["date_approved"] + pd.to_timedelta("1 hour")
df["date_approved"] = df["date_approved"].dt.tz_localize(None)
df["date_closed"] = df["date_closed"] + pd.to_timedelta("1 hour")
df["date_closed"] = df["date_closed"].dt.tz_localize(None)


print(df.shape)
df.sample(1)
# df.head(4)

(970, 17)


Unnamed: 0,ml_code,payment_status,order_status,order_id,shipping_id,pack_id,title,variation_id,category_id,seller_sku,quantity,variation_name,variation_attributes_id,variation_value_id,variation_value_name,date_approved,date_closed
249,MLB2680948743,approved,paid,2000007094505718,42885330464,0,Ukulele Concert Winner 23 Sapele Cordas Aquila By Kalani,174653391986,MLB202371,FULLC4WINNER11023,1,Cor,COLOR,52005,Marrom,2023-12-08 15:19:11,2023-12-08 15:20:11


In [None]:
print(df.shape)
df = df.drop_duplicates()
print(df.shape)

In [None]:
df.dtypes

## Coletando logistic type

In [None]:
import requests
import time

json_logistic_list = []
success_count = 0

total_iterations = len(df["shipping_id"])
for index, shipping_id in enumerate(df["shipping_id"], start=1):
    url = f"https://api.mercadolibre.com/shipments/{shipping_id}"

    # headers = {"Authorization": f"Bearer {ACCESS_TOKEN_BUENOSHOPS}"}
    # headers = {"Authorization": f"Bearer {ACCESS_TOKEN_MUSICALCRIS}"}
    headers = {"Authorization": f"Bearer {ACCESS_TOKEN_MCENTER}"}

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()

        json_data = response.json()
        json_logistic_list.append(json_data)
        success_count += 1
        print(f"Obtido com sucesso para shipping_id {shipping_id}: {json_data}")
    except requests.exceptions.RequestException as e:
        print(f"Erro ao obter dados para shipping_id {shipping_id}: {e}")

    # time.sleep(5)

    # Exibir o progresso
    progress_percentage = (index / total_iterations) * 100
    print(f"Progresso: {index}/{total_iterations} ({progress_percentage:.2f}%)")

# Exibir os resultados
print(json_logistic_list)

# Exibir estatísticas de conclusão
print(f"Número total de iterações: {total_iterations}")
print(f"Número de iterações bem-sucedidas: {success_count}")
print(f"Número de iterações falhadas: {total_iterations - success_count}")


In [None]:
print(len(json_logistic_list))
print(len(json_list))

In [None]:
len(json_logistic_list) == len(json_list)

In [None]:
# write_file(json_logistic_list, "../../Data/Output/bueno_shipping.json")
# write_file(json_logistic_list, "../../Data/Output/musicalsris_shipping.json")
write_file(json_logistic_list, "../../Data/Output/mucenter_shipping.json")

In [None]:
# with open("../../Data/Output/shipping.json", "r") as file:
# # with open("orders_05_06.json", "r") as file:
#     json_logistic_list = json.load(file)

# len(json_logistic_list)

### Adicionando logistic_type ao df principal

In [None]:
dfx = pd.DataFrame(json_logistic_list)
# cols = ['id', 'order_id', 'logistic_type']
cols = ["id", "logistic_type"]
dfx = dfx[cols]
# dfx['id'].value_counts()

print(dfx.shape)
dfx.head(3)

In [None]:
# Criar um dicionário a partir de dfx para mapear 'order_id' para 'logistic_type'
order_id_to_logistic_type = dfx.set_index("id")["logistic_type"].to_dict()

# Adicionar a coluna 'logistic_type' a df usando o mapeamento
df_result = df.copy()  # Criar uma cópia de df para manter o original intacto
df_result["logistic_type"] = df_result["shipping_id"].map(order_id_to_logistic_type)

print(df_result.shape)
df_result.head(3)

In [None]:
# x = df_result[df_result['shipping_id'] == 42782133490]
# x = df_result[df_result['ml_code'] == 'MLB4166151108']
# x = df_result[df_result['order_id'] == 2000006859129842]
x = df_result[pd.isna(df_result["logistic_type"])]

x

In [None]:
df_result[["order_id", "shipping_id", "pack_id", "variation_id"]] = df_result[
    ["order_id", "shipping_id", "pack_id", "variation_id"]
].astype(str)
df_result[["order_id", "shipping_id", "pack_id", "variation_id"]]

In [None]:
df_result.dtypes

## Populando tabela

In [None]:
df_result["variation_attributes_id"] = df_result["variation_attributes_id"].replace(
    "<built-in function id>", None, inplace=True
)

df_result[df_result["ml_code"] == "MLB1991060554"]
df_result[df_result["order_id"] == "2000006887872016"]
# df[df['ml_code']=='MLB1991060554']

In [None]:
conn = psycopg2.connect(**db_config)

cursor = conn.cursor()

for index, row in df_result.iterrows():
    # for index, row in df_resultado.iterrows():
    
    # insert_query = sql.SQL(
    #     "INSERT INTO bueno_ml_orders (ml_code,category_id,variation_id,seller_sku,order_id,pack_id,quantity,title,order_status,payment_status,variation_name,variation_attributes_id,variation_value_id,variation_value_name,shipping_id,date_approved,date_closed,logistic_type) VALUES (%s, %s, %s, %s,%s, %s, %s,%s, %s, %s,%s, %s, %s, %s, %s, %s, %s, %s)"
    # )
    # print("Inserindo dados:", [value for value in row])
    
    # insert_query = sql.SQL(
    #     "INSERT INTO cris_ml_orders (ml_code,category_id,variation_id,seller_sku,order_id,pack_id,quantity,title,order_status,payment_status,variation_name,variation_attributes_id,variation_value_id,variation_value_name,shipping_id,date_approved,date_closed,logistic_type) VALUES (%s, %s, %s, %s,%s, %s, %s,%s, %s, %s,%s, %s, %s, %s, %s, %s, %s, %s)"
    # )
    # print("Inserindo dados:", [value for value in row])
    
    # print("Tipos de dados dos valores:", [type(value) for value in row])
    
    insert_query = sql.SQL(
        "INSERT INTO mcenter_ml_orders (ml_code,category_id,variation_id,seller_sku,order_id,pack_id,quantity,title,order_status,payment_status,variation_name,variation_attributes_id,variation_value_id,variation_value_name,shipping_id,date_approved,date_closed,logistic_type) VALUES (%s, %s, %s, %s,%s, %s, %s,%s, %s, %s,%s, %s, %s, %s, %s, %s, %s, %s)"
    )
    print("Inserindo dados:", [value for value in row])
    
    cursor.execute(
        insert_query,
        (
            row["ml_code"],
            row["category_id"],
            row["variation_id"],
            row["seller_sku"],
            row["order_id"],
            row["pack_id"],
            row["quantity"],
            row["title"],
            row["order_status"],
            row["payment_status"],
            row["variation_name"],
            row["variation_attributes_id"],
            row["variation_value_id"],
            row["variation_value_name"],
            row["shipping_id"],
            row["date_approved"],
            row["date_closed"],
            row["logistic_type"],
        ),
    )

conn.commit()

# Feche o cursor e a conexão
cursor.close()
conn.close()
print("Dados inseridos com sucesso!")