In [1]:
import json
import os
import time
from pandas import json_normalize
import psycopg2
from psycopg2 import sql
import math
import pandas as pd
import requests
from dotenv import load_dotenv
from datetime import datetime, timedelta
import numpy as np

load_dotenv()

ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")
HOST = os.getenv("HOST")
POSTGRES_DB = os.getenv("POSTGRES_DB")
POSTGRES_USER = os.getenv("POSTGRES_USER")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")


# Informações de conexão com o banco de dados PostgreSQL
db_config = {
    "host": HOST,
    "database": POSTGRES_DB,
    "user": POSTGRES_USER,
    "password": POSTGRES_PASSWORD,
}

# Registra o tempo antes da execução
start_prog = time.time()

#### Verificando dados de usuário

In [2]:
#### Verificando dados de usuário
url = "https://api.mercadolibre.com/users/me"

payload = {}
headers = {"Authorization": f"Bearer {ACCESS_TOKEN}"}

response = requests.request("GET", url, headers=headers, data=payload)
response = response.text

print(response)

{"id":233632476,"nickname":"MUSICALCENTERPG","registration_date":"2016-11-05T17:35:04.000-04:00","first_name":"MUSICAL CENTER PONTA","last_name":"GROSSA EIRELI -","gender":"","country_id":"BR","email":"musicalc168@gmail.com","identification":{"number":"10885874000127","type":"CNPJ"},"address":{"address":"Estrada da Gruta 775","city":"Louveira","state":"BR-SP","zip_code":"13290431"},"phone":{"area_code":"19","extension":"","number":"38787702","verified":false},"alternative_phone":{"area_code":"","extension":"","number":""},"user_type":"brand","tags":["brand","large_seller","eshop","mshops","messages_as_seller"],"logo":null,"points":4534,"site_id":"MLB","permalink":"http://perfil.mercadolivre.com.br/MUSICALCENTERPG","seller_experience":"ADVANCED","bill_data":{"accept_credit_note":"Y"},"seller_reputation":{"level_id":"5_green","power_seller_status":"platinum","transactions":{"canceled":1546,"completed":33959,"period":"historic","ratings":{"negative":0.05,"neutral":0.03,"positive":0.92},"t

In [23]:
tempo_350_item = 10
total_item = 18556


x = (tempo_350_item*total_item)/350
x /60

8.836190476190476

8.836190476190476

In [None]:
# Selecionar data da pesquisa
date_from = "2023-10-29"
date_to = "2023-12-25"

# URL base da API
base_url = "https://api.mercadolibre.com/orders/search"

# Parâmetros iniciais
params = {
    "seller": "233632476",
    "order.date_closed.from": f"{date_from}T00:00:00.000-03:00",
    "order.date_closed.to": f"{date_to}T00:00:00.000-03:00",
    "limit": 50,
    "offset": 0,
}

headers = {"Authorization": f"Bearer {ACCESS_TOKEN}"}

json_list = []

counter = 0

# Paginando e coletando dados de orders
try:
    while True:
        response = requests.get(base_url, params=params, headers=headers)
        response.raise_for_status()  # Lança uma exceção se a resposta não for bem-sucedida
        data = response.json()

        if "results" in data:
            json_list.extend(data["results"])
        else:
            break

        # Verifique se há mais páginas
        if "paging" in data:
            total_paging = data["paging"].get("total")
            if total_paging is None:
                break

            total_pages = math.ceil(total_paging / params["limit"])
            print(f"Total esperado de páginas: {counter}/{total_pages}")
            print(f'Offset atual: {params["offset"]}')

            counter += 1
            if params["offset"] >= total_paging:
                break

            params["offset"] += params["limit"]
        else:
            break

except requests.exceptions.RequestException as req_err:
    print(f"Erro ao fazer a requisição para {base_url}: {req_err}")

except Exception as e:
    print(f"Erro não esperado: {e}")

print(f"Total de dados coletados: {len(json_list)}")

In [None]:
print(len(json_list))
json_list

### Coletando dados de pagamento


In [None]:
meta_cols = ["date_closed", "pack_id", "shipping", "order_items", "fulfilled"]

df_payments = json_normalize(json_list, record_path=["payments"], meta=meta_cols)

In [None]:
df_payments.columns

In [None]:
cols = ["date_approved", "status", "shipping", "fulfilled"]
df_payments = df_payments[cols]

col = {"status": "payment_status"}
df_payments.rename(columns=col, inplace=True)

In [None]:
df_payments.head(1)

In [None]:
# Removendo valores nulos
print(df_payments.shape)
df_payments = df_payments.dropna(subset=["date_approved"])
print(df_payments.shape)

In [None]:
# Extraindo shipping_id
df_payments["shipping_id"] = df_payments["shipping"].apply(lambda x: x["id"])
df_payments["shipping_id"] = (
    df_payments["shipping_id"]
    .astype(str)
    .apply(lambda x: x.split(".")[0] if "." in x else x)
)
df_payments = df_payments.drop("shipping", axis=1)
df_payments.head(1)

In [None]:
df_payments["shipping_id"].value_counts()

In [None]:
print(df_payments.shape)

In [None]:
df_payments = df_payments.drop_duplicates()
df_payments.sample()

In [None]:
# Encontrando os índices das linhas com a data mais recente para cada shipping_id
indices_recentes = df_payments.groupby("shipping_id")["date_approved"].idxmax()
indices_recentes

In [None]:
# Verificando se existe mais de um envio
df_payments["shipping_id"].value_counts()[df_payments["shipping_id"].value_counts() > 1]

In [None]:
# Criando um novo DataFrame com base nos índices de envio encontrados
df_payments = df_payments.loc[indices_recentes]
df_payments = df_payments.loc[df_payments["shipping_id"] != "nan"]

df_payments["shipping_id"].value_counts()[df_payments["shipping_id"].value_counts() > 1]

In [None]:
print(df_payments.shape)
df_payments

In [None]:
df_payments["payment_status"].value_counts()

In [None]:
df_payments["shipping_id"].value_counts()

### Coletando dados de orders

In [None]:
df_orders = json_normalize(
    json_list,
    record_path=["order_items"],
    meta=["date_closed", "pack_id", "status", "shipping"],
)

## pd.set_option('display.max_columns', None)
df_orders.columns

In [None]:
df_orders["item.variation_id"].value_counts()

In [None]:
cols = [
    "quantity",
    "item.id",
    "item.title",
    "item.category_id",
    "item.variation_id",
    "item.seller_sku",
    "date_closed",
    "pack_id",
    "status",
    "shipping",
]

df_orders = df_orders[cols]
df_orders["item.variation_id"] = (
    df_orders["item.variation_id"]
    .astype(str)
    .apply(lambda x: x.split(".")[0] if "." in x else x)
)

df_orders.sample(5)

In [None]:
# Extraindo shipping_id
df_orders["shipping_id"] = df_orders["shipping"].apply(lambda x: x["id"])
df_orders["shipping_id"] = (
    df_orders["shipping_id"]
    .astype(str)
    .apply(lambda x: x.split(".")[0] if "." in x else x)
)
df_orders = df_orders.drop("shipping", axis=1)

In [None]:
df_orders.sample(10)

In [None]:
valores_unicos = df_orders["shipping_id"].value_counts()
valores_unicos

In [None]:
df_orders.shape

### Unindo DFs de pagamentos e vendas

In [None]:
df_resultado = pd.merge(df_orders, df_payments, on="shipping_id", how="left")
df_resultado.sample(5)

In [None]:
df_resultado.shape

In [None]:
cols = {
    "item.id": "ml_code",
    "item.category_id": "category_id",
    "item.variation_id": "variation_id",
    "item.seller_sku": "seller_sku",
    "item.title": "title",
    "status": "order_status",
}
df_resultado.rename(columns=cols, inplace=True)
df_resultado.sample()

In [None]:
print(df_resultado.shape)
df_resultado = df_resultado.dropna(subset=["date_approved"])
print(df_resultado.shape)

## Lendo a tabela

In [None]:
# Buscando histórico de vendas na tabela ml_orders_hist para o período definido
try:
    conn = psycopg2.connect(**db_config)

    # Construa a consulta SQL com a condição de data
    sql_query = f"SELECT * FROM ml_orders_hist"
    print(sql_query)
    # Execute a consulta e leia os dados em um DataFrame
    df_orders = pd.read_sql(sql_query, conn)

except psycopg2.Error as e:
    print(f"Erro do psycopg2 ao consultar ml_orders_hist: {e}")
    # logger.error(f"Erro do psycopg2 ao consultar ml_orders_hist: {e}")

except Exception as e:
    print(f"Erro ao consultar ml_orders_hist: {e}")
    # logger.error(f"Erro ao consultar ml_orders_hist: {e}")

finally:
    if conn is not None:
        conn.close()

In [None]:
print(df_orders.shape)
df_orders.sort_values(by='date_closed', ascending=False).head(3)

### Populando a tabela

In [None]:
cols = ['ml_code','category_id','variation_id','seller_sku','pack_id','quantity','title','order_status','payment_status','shipping_id','date_approved','date_closed','fulfilled']
df_resultado = df_resultado[cols]
df_resultado.columns

In [None]:
df_resultado.sort_values(by='date_closed', ascending=False).head(3)

In [None]:
df_orders.sort_values(by='date_closed', ascending=False).head(3)

In [None]:
merged_df = pd.merge(df_resultado, df_orders, on='ID', how='left', indicator=True)
merged_df

In [None]:
import pandas as pd

# Exemplo de DataFrames
data_resultado = {'ID': [1, 2, 3, 4, 5],
                  'Resultado': ['A', 'B', 'C', 'D', 'E']}
df_resultado = pd.DataFrame(data_resultado)

data_orders = {'ID': [2, 4, 6, 8, 10],
               'Quantidade': [20, 40, 60, 80, 100]}
df_orders = pd.DataFrame(data_orders)

# Realizar a comparação
merged_df = pd.merge(df_resultado, df_orders, on=['ml_code', 'seller_sku'], how='left', indicator=True)

# Filtrar apenas as linhas que estão em df_resultado e não em df_orders
resultado_exclusivo = merged_df[merged_df['_merge'] == 'left_only']

# Exibir o resultado
resultado_exclusivo

In [None]:
# Ela contém a relação entre tiny_id e inventory_id do ML
# e não permite pares de valores duplicados entre (ml_inventory_id, tiny_id)

conn = psycopg2.connect(**db_config)

cursor = conn.cursor()

for index, row in df_resultado.iterrows():
    insert_query = sql.SQL(
        "INSERT INTO ml_orders_hist (ml_code,category_id,variation_id,seller_sku,pack_id,quantity,title,order_status,payment_status,shipping_id,date_approved,date_closed,fulfilled) VALUES (%s, %s, %s,%s, %s, %s,%s, %s, %s,%s, %s, %s, %s)"
    )
    cursor.execute(
        insert_query,
        (
            row["ml_code"],
            row["category_id"],
            row["variation_id"],
            row["seller_sku"],
            row["pack_id"],
            row["quantity"],
            row["title"],
            row["order_status"],
            row["payment_status"],
            row["shipping_id"],
            row["date_approved"],
            row["date_closed"],
            row["fulfilled"],
        ),
    )

conn.commit()

# Feche o cursor e a conexão
cursor.close()
conn.close()
print("Dados inseridos com sucesso!")