## Populando a tabela de códigos

In [1]:
import os
import time
from pandas import json_normalize
import psycopg2
from psycopg2 import sql

import pandas as pd
from dotenv import load_dotenv

load_dotenv()

access_token = os.getenv("ACCESS_TOKEN")
HOST = os.getenv("HOST")
POSTGRES_DB = os.getenv("POSTGRES_DB")
POSTGRES_USER = os.getenv("POSTGRES_USER")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")


# configuarações de conexão com o banco de dados
db_config = {
    "host": HOST,
    "database": POSTGRES_DB,
    "user": POSTGRES_USER,
    "password": POSTGRES_PASSWORD,
}

In [15]:
# Carregando dados do Google Sheets
sheet_id = "1bKChKN304n76zBkZTfBxXrbSvSf2ZC4oXap940x8MNc"
sheet_name = "Relação Full x Tiny"
url = f"https://docs.google.com/spreadsheet/ccc?key={sheet_id}&output=xlsx"

# features
col1 = "Código ML"
col2 = "ID do anúncio"
col3 = "ID Tiny"
col4 = "SKU"
col5 = "SKU Tiny"

df_codes = pd.read_excel(
    url, sheet_name=sheet_name, usecols=[col1, col2, col3, col4, col5]
)

col = {
    "Código ML": "ml_inventory_id",
    "ID do anúncio": "ml_code",
    "ID Tiny": "tiny_id",
    "SKU": "ml_sku",
    "SKU Tiny": "tiny_sku",
}

df_codes.rename(columns=col, inplace=True)

df_codes.shape

(877, 5)

In [16]:
df_codes.head(3)

Unnamed: 0,ml_sku,ml_inventory_id,ml_code,tiny_sku,tiny_id
0,FULLNI5BM,DSGP06967,1992567302,7897937421021,735947207
1,FULLNI7AM,DSGP06979,1992567302,7897937421007,747848159
2,FULLNI5AM,JJSZ06277,1992567302,7897937421014,698412654


### Adicionando prefixo MLB na coluna 'ml_code'

In [17]:
df_codes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 877 entries, 0 to 876
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   ml_sku           875 non-null    object
 1   ml_inventory_id  795 non-null    object
 2   ml_code          877 non-null    int64 
 3   tiny_sku         877 non-null    object
 4   tiny_id          877 non-null    int64 
dtypes: int64(2), object(3)
memory usage: 34.4+ KB


In [18]:
df_codes["ml_code"] = df_codes["ml_code"].apply(str)
df_codes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 877 entries, 0 to 876
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   ml_sku           875 non-null    object
 1   ml_inventory_id  795 non-null    object
 2   ml_code          877 non-null    object
 3   tiny_sku         877 non-null    object
 4   tiny_id          877 non-null    int64 
dtypes: int64(1), object(4)
memory usage: 34.4+ KB


In [19]:
df_codes["ml_code"] = "MLB" + df_codes["ml_code"]
df_codes.head(1)

Unnamed: 0,ml_sku,ml_inventory_id,ml_code,tiny_sku,tiny_id
0,FULLNI5BM,DSGP06967,MLB1992567302,7897937421021,735947207


In [20]:
df_codes.shape

(877, 5)

### Tratando dados duplicados

In [21]:
duplicates = df_codes.duplicated()
n_dup = duplicates.sum()

print(f"Número de duplicatas: {n_dup}")

duplicates = df_codes[df_codes.duplicated(keep=False)]

duplicates

Número de duplicatas: 7


Unnamed: 0,ml_sku,ml_inventory_id,ml_code,tiny_sku,tiny_id
519,FULLORIONSPX15HH,WXUT79339,MLB1512798100,7898608740991,681951269
520,FULLORIONSP16CHH,TDFP80328,MLB1512860650,7898608741899,561967156
521,FULLORIONSP18CH,OFYI79568,MLB1509021649,7898608741905,561968045
607,FULLC4RH16,IYUQ71257,MLB2192258271,7891088100057,737198870
679,FULLTAGIMA27KCOMCAPA,DUCB06017,MLB1557290709,7898563978156,512617044
680,FULLTAGIMA27KCOMCAPA,DUCB06017,MLB1557290709,7898563978156,512617044
681,FULLTAGIMA27KCOMCAPAC,SSBF06040,MLB1557306058,7898563978156,512617044
682,FULLTAGIMA27KCOMCAPAC,SSBF06040,MLB1557306058,7898563978156,512617044
714,CTX3000SIMPLES,KVWJ40778,MLB1233445054,4971850349099,506979980
718,FULLC4RH16,IYUQ71257,MLB2192258271,7891088100057,737198870


In [22]:
df_codes = df_codes.drop_duplicates()
df_codes.shape

(870, 5)

In [23]:
df_codes.head(3)

Unnamed: 0,ml_sku,ml_inventory_id,ml_code,tiny_sku,tiny_id
0,FULLNI5BM,DSGP06967,MLB1992567302,7897937421021,735947207
1,FULLNI7AM,DSGP06979,MLB1992567302,7897937421007,747848159
2,FULLNI5AM,JJSZ06277,MLB1992567302,7897937421014,698412654


In [24]:
## Removendo NaN da coluna de ml_inventory_id

df_codes = df_codes.dropna()
df_codes.shape

(786, 5)

In [25]:
df_ml_codes = df_codes[["ml_code", "ml_sku", "ml_inventory_id"]]
df_ml_codes

Unnamed: 0,ml_code,ml_sku,ml_inventory_id
0,MLB1992567302,FULLNI5BM,DSGP06967
1,MLB1992567302,FULLNI7AM,DSGP06979
2,MLB1992567302,FULLNI5AM,JJSZ06277
3,MLB2606294854,FULLVANDOREN2CR1015,DGCL82617
4,MLB2606294854,FULLVANDOREN2CR102,GCYF86172
...,...,...,...
834,MLB949788598,P10LNINJA10FT,ERIM51807
835,MLB950297972,LWNINJA15FT1,TDFV51283
873,MLB1363602423,7643D,OYUK51882
874,MLB2004836143,FULLBASSOVTSL52,BLHH53768


In [27]:
df_ml_codes = df_ml_codes.drop_duplicates()
df_ml_codes.shape

(633, 3)

## Populando tabela tiny_ml_codes no DB

In [73]:
# Ela contém a relação entre tiny_id e inventory_id do ML
# e não permite pares de valores duplicados entre (ml_inventory_id, tiny_id)

conn = psycopg2.connect(**db_config)

cursor = conn.cursor()

for index, row in df_ml_codes.iterrows():
    insert_query = sql.SQL(
        "INSERT INTO ml_codes (ml_inventory_id, ml_code, ml_sku) VALUES (%s, %s, %s)"
    )
    cursor.execute(
        insert_query, (row["ml_inventory_id"], row["ml_code"], row["ml_sku"])
    )

conn.commit()

# Feche o cursor e a conexão
cursor.close()
conn.close()
print("Dados inseridos com sucesso!")

UniqueViolation: duplicate key value violates unique constraint "ml_codes_pkey"
DETAIL:  Key (ml_inventory_id)=(NQUG25995) already exists.


In [74]:
id = "NQUG25995"
x = df_ml_codes[df_ml_codes["ml_inventory_id"] == id]
x

Unnamed: 0,ml_code,ml_sku,ml_inventory_id
86,MLB1407949289,FULLMAPEXH600,NQUG25995
162,MLB1459771919,FULLMOOERM018,NQUG25995


In [78]:
id = "NQUG25995"
x = df_ml_codes[df_ml_codes["ml_inventory_id"] == id]
x

Unnamed: 0,ml_code,ml_sku,ml_inventory_id
86,MLB1407949289,FULLMAPEXH600,NQUG25995
162,MLB1459771919,FULLMOOERM018,NQUG25995


In [79]:
# Verificar duplicatas apenas na coluna 'ml_inventory_id'
dup = df_ml_codes[df_ml_codes["ml_inventory_id"].duplicated()]

# Exibir os valores duplicados na coluna 'ml_inventory_id'
print("Valores Duplicados na coluna 'ml_inventory_id':")
dup.to_excel("dados_duplicados.xlsx")

Valores Duplicados na coluna 'ml_inventory_id':


In [52]:
valores_unicos_duplicatas = dup[
    dup.duplicated(subset=["ml_inventory_id"], keep=False)
].drop_duplicates(subset=["ml_inventory_id"])
valores_unicos_duplicatas

Unnamed: 0,ml_sku,ml_inventory_id,ml_code,tiny_sku,tiny_id
442,FULLKIT01,JDTL81315,MLB1813530858,Palheta,565665071
446,FULLKIT01,FTGG16520,MLB1814087324,Palheta,565665071
450,FULLGIANNINICS14EPNS,ZDLT16176,MLB1814290902,AFINADORKIT,597415799
466,FULLKIT02,KKOU90189,MLB1937452277,RH-01GD,746248995
474,FULLKITPPPCA,WQLG77218,MLB1942106803,Palheta,565665071
478,FULLKIT1,DMTU81740,MLB1943181299,Palheta,565665071
483,FULLKIT3,KMCE82136,MLB1943185413,Palheta,565665071
488,FULLKIT2,FSNB76403,MLB1943185424,Palheta,565665071
493,FULLKITCACPPP,ITDN80392,MLB1945629366,Palheta,565665071
509,FULLKIT03,ZEWV91930,MLB1976555976,Palheta,565665071


In [53]:
lista_coluna1 = dup["ml_inventory_id"].tolist()

# Exibir a lista resultante
print("Lista da coluna 'coluna1':")
print(lista_coluna1)

Lista da coluna 'coluna1':
['NQUG25995', 'FFIA05428', 'DZVJ05686', 'OVRP11252', 'DGLC87134', 'YNOM43193', 'JDTL81315', 'JDTL81315', 'JDTL81315', 'FTGG16520', 'FTGG16520', 'FTGG16520', 'ZDLT16176', 'ZDLT16176', 'ZDLT16176', 'UCDR32919', 'KKOU90189', 'KKOU90189', 'KKOU90189', 'BOZK72331', 'WQLG77218', 'WQLG77218', 'WQLG77218', 'DMTU81740', 'DMTU81740', 'DMTU81740', 'DMTU81740', 'KMCE82136', 'KMCE82136', 'KMCE82136', 'KMCE82136', 'FSNB76403', 'FSNB76403', 'FSNB76403', 'FSNB76403', 'ITDN80392', 'ITDN80392', 'ITDN80392', 'ITDN80392', 'ZEWV91930', 'ZEWV91930', 'ZEWV91930', 'NKRN92702', 'NKRN92702', 'NKRN92702', 'JAOF95919', 'JAOF95919', 'JAOF95919', 'AOLT03050', 'AOLT03050', 'AOLT03050', 'TNXB03680', 'TNXB03680', 'JKBQ98755', 'JKBQ98755', 'JKBQ98755', 'IYUQ03362', 'IYUQ03362', 'IYUQ03362', 'QRSL03944', 'QRSL03944', 'QRSL03944', 'WEPI04061', 'WEPI04061', 'WEPI04061', 'PNZZ04589', 'PNZZ04589', 'PNZZ04589', 'YZGH03888', 'YZGH03888', 'YZGH03888', 'THQG04019', 'THQG04019', 'THQG04019', 'HKMH99637

In [54]:
len(lista_coluna1)

164

In [48]:
dup["ml_inventory_id"].value_counts()

ml_inventory_id
KMCE82136    4
DMTU81740    4
FSNB76403    4
ITDN80392    4
EIPZ51427    4
            ..
YNOM43193    1
DGLC87134    1
OVRP11252    1
DZVJ05686    1
POTW78210    1
Name: count, Length: 62, dtype: int64