## Populando a tabela de códigos

In [2]:
import os
import time
from pandas import json_normalize
import psycopg2
from psycopg2 import sql

import pandas as pd
from dotenv import load_dotenv

load_dotenv()

access_token = os.getenv("ACCESS_TOKEN")
HOST = os.getenv("HOST")
POSTGRES_DB = os.getenv("POSTGRES_DB")
POSTGRES_USER = os.getenv("POSTGRES_USER")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")


# configuarações de conexão com o banco de dados 
db_config = {
    "host": "localhost",
    "database": POSTGRES_DB,
    "user": POSTGRES_USER,
    "password": POSTGRES_PASSWORD,
}


In [3]:
# Carregando dados do arquivo excel

caminho_arquivo_excel = r'../Data/Base/Envios Full.xlsx'

# planilha
nome_planilha = 'Relação Full x Tiny'

# features
col1 = 'Código ML' 
col2 = 'ID do anúncio'
col3 = 'ID Tiny'
col4 = 'SKU'
col5 = 'SKU Tiny'

df_codes = pd.read_excel(caminho_arquivo_excel, sheet_name=nome_planilha, usecols=[col1,col2,col3,col4,col5])

col = {'Código ML' : 'ml_inventory_id', 'ID do anúncio': 'ml_code', 'ID Tiny': 'tiny_id', 'SKU': 'ml_sku', 'SKU Tiny': 'tiny_sku'}

df_codes.rename(columns=col, inplace= True)

df_codes.shape

(885, 5)

In [4]:
df_codes.head(3)

Unnamed: 0,ml_sku,ml_inventory_id,ml_code,tiny_sku,tiny_id
0,FULLP10NINJA10FTC,JFGN34621,924922735,7899028808537,509517168
1,P10LNINJA10FT,ERIM51807,949788598,7899028824407,598796832
2,FULLLWNINJA10FT,HVCS52763,950214971,7899028808773,565287467


### Adicionando prefixo MLB na coluna 'ml_code'

In [13]:
df_codes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 885 entries, 0 to 884
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   ml_sku           883 non-null    object
 1   ml_inventory_id  767 non-null    object
 2   ml_code          885 non-null    int64 
 3   tiny_sku         884 non-null    object
 4   tiny_id          885 non-null    int64 
dtypes: int64(2), object(3)
memory usage: 34.7+ KB


In [14]:
df_codes['ml_code'] = df_codes['ml_code'].apply(str)
df_codes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 885 entries, 0 to 884
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   ml_sku           883 non-null    object
 1   ml_inventory_id  767 non-null    object
 2   ml_code          885 non-null    object
 3   tiny_sku         884 non-null    object
 4   tiny_id          885 non-null    int64 
dtypes: int64(1), object(4)
memory usage: 34.7+ KB


In [15]:
df_codes['ml_code'] = 'MLB' + df_codes['ml_code']
df_codes.head(1)

Unnamed: 0,ml_sku,ml_inventory_id,ml_code,tiny_sku,tiny_id
0,FULLP10NINJA10FTC,JFGN34621,MLB924922735,7899028808537,509517168


In [17]:
df_codes.shape

(885, 5)

### Tratando dados duplicados

In [21]:
duplicates = df_codes.duplicated()
n_dup = duplicates.sum()

print(f'Número de duplicatas: {n_dup}')

duplicates = df_codes[df_codes.duplicated(keep=False)]

duplicates

Número de duplicatas: 7


Unnamed: 0,ml_sku,ml_inventory_id,ml_code,tiny_sku,tiny_id
52,CTX3000SIMPLES,KVWJ40778,MLB1233445054,4971850349099,506979980
53,CTX3000SIMPLES,KVWJ40778,MLB1233445054,4971850349099,506979980
252,FULLORIONSP18CH,OFYI79568,MLB1509021649,7898608741905,561968045
253,FULLORIONSP18CH,OFYI79568,MLB1509021649,7898608741905,561968045
269,FULLORIONSPX15HH,WXUT79339,MLB1512798100,7898608740991,681951269
270,FULLORIONSPX15HH,WXUT79339,MLB1512798100,7898608740991,681951269
272,FULLORIONSP16CHH,TDFP80328,MLB1512860650,7898608741899,561967156
273,FULLORIONSP16CHH,TDFP80328,MLB1512860650,7898608741899,561967156
285,FULLTAGIMA27KCOMCAPA,DUCB06017,MLB1557290709,7898563978156,512617044
286,FULLTAGIMA27KCOMCAPA,DUCB06017,MLB1557290709,7898563978156,512617044


In [22]:
df_codes = df_codes.drop_duplicates()
df_codes.shape

(878, 5)

In [25]:
df_codes.head(3)

Unnamed: 0,ml_sku,ml_inventory_id,ml_code,tiny_sku,tiny_id
0,FULLP10NINJA10FTC,JFGN34621,MLB924922735,7899028808537,509517168
1,P10LNINJA10FT,ERIM51807,MLB949788598,7899028824407,598796832
2,FULLLWNINJA10FT,HVCS52763,MLB950214971,7899028808773,565287467


## Populando tabela tiny_ml_codes no DB

In [28]:

# Ela contém a relação entre tiny_id e inventory_id do ML 
# e não permite pares de valores duplicados entre (ml_inventory_id, tiny_id)

conn = psycopg2.connect(**db_config)

cursor = conn.cursor()

for index, row in df_codes.iterrows():
    insert_query = sql.SQL("INSERT INTO tiny_ml_codes (ml_inventory_id, ml_code, ml_sku, tiny_id, tiny_sku) VALUES (%s, %s, %s, %s, %s)")
    cursor.execute(insert_query, (row['ml_inventory_id'], row['ml_code'], row['ml_sku'], row['tiny_id'], row['tiny_sku']))

conn.commit()

# Feche o cursor e a conexão
cursor.close()
conn.close()
print('Dados inseridos com sucesso!')

Dados inseridos com sucesso!
