### Conexão database

In [132]:
from sqlalchemy import create_engine
from urllib.parse import quote_plus

usuario = 'postgres'
senha = quote_plus('@manaus')
host = 'localhost'
porta = '5432'
nome_do_banco = 'db_mrp'

# Preparação de Dados para Banco de Dados

Este notebook demonstra o processo de preparação de uma planilha para inserção em um banco de dados. Os passos incluem:
- Carregar o arquivo Excel
- Limpar e ajustar os dados
- Tratar valores nulos
- Exportar os dados para um banco de dados SQL

## 1. Carregar o arquivo Excel

In [133]:
import pandas as pd

# Carregar a planilha

file_path = r'C:\Users\Paulo\Documents\project01\data\PPH Daily 12.04.2024.xlsx'
xls = pd.ExcelFile(file_path)

# Carregar a planilha específica
df_pph = xls.parse('PPH', header=0)

# Exibir as primeiras linhas
df_pph.head()

Unnamed: 0,Org.,Device Type,Model,Suffix,UPH,Buyer,PST,PET,Total,Result,...,D+174 27-May,D+175 28-May,D+176 29-May,D+177 30-May,D+178 31-May,D+179 01-Jun,D+180 02-Jun,D+181 03-Jun,D+182 04-Jun,Space
0,NW7,SRAC (INVERTER),S3NQ09AAQAL,EB2GAM1,330,LGESP,2024-12-11,2025-05-14 07:46:21,34097,0,...,0,0,0,0,0,0,0,0,0,
1,NW7,SRAC (INVERTER),S3NQ09AA31A,EB2GAM1,330,LGESP,2024-12-06,2025-05-23 07:30:00,77904,0,...,0,0,0,0,0,0,0,0,0,
2,NW7,SRAC (INVERTER),S3NQ09AA31B,EB1GAM1,330,LGESP,2024-12-03,2025-05-23 07:30:00,6577,1362,...,0,0,0,0,0,0,0,0,0,
3,NW7,SRAC (INVERTER),S3NQ09AA33A,EB2GAM1,330,LGESP(2),2024-12-06,2025-05-09 07:30:00,13783,0,...,0,0,0,0,0,0,0,0,0,
4,NW7,AIR CONDTIONER,S3NQ09AM31A,EB2GAM1,330,LGESP,2025-01-23,2025-05-23 07:30:00,6055,0,...,0,0,0,0,0,0,0,0,0,


## Remover colunas não utilizadas

In [134]:
# Removendo colunas indesejadas
df_pph = df_pph.drop(columns=['Plan', 'Device  Type', 'UPH', 'Buyer', 'PST', 'PET', 'Total', 'Result', 'Space'])

In [135]:
df_pph

Unnamed: 0,Org.,Model,Suffix,D-3 01-Dec,D-2 02-Dec,D-1 03-Dec,D+0 04-Dec,D+1 05-Dec,D+2 06-Dec,D+3 07-Dec,...,D+173 26-May,D+174 27-May,D+175 28-May,D+176 29-May,D+177 30-May,D+178 31-May,D+179 01-Jun,D+180 02-Jun,D+181 03-Jun,D+182 04-Jun
0,NW7,S3NQ09AAQAL,EB2GAM1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,NW7,S3NQ09AA31A,EB2GAM1,0,0,0,0,0,252,0,...,0,0,0,0,0,0,0,0,0,0
2,NW7,S3NQ09AA31B,EB1GAM1,0,0,1362,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,NW7,S3NQ09AA33A,EB2GAM1,0,0,0,0,0,2613,0,...,0,0,0,0,0,0,0,0,0,0
4,NW7,S3NQ09AM31A,EB2GAM1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,NWH,75UH5J-MP,AWZJLRZ,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
332,NWH,75UH5N-MP,AWZGLRZ,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
333,NWH,75UL3J-EP,AWZFLRZ,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
334,NWH,86UH5J-HP,AWZJLRZ,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [136]:
# Realizar o melt (caso existam mais colunas que você queira derreter)
df_pph = pd.melt(
    df_pph, 
    id_vars=['Org.', 'Model', 'Suffix'], 
    var_name='Date', 
    value_name='Quantity')

# Convertendo a coluna 'Quantity' para o tipo inteiro
df_pph['Quantity'] = df_pph['Quantity'].astype(int)

In [137]:
df_pph


Unnamed: 0,Org.,Model,Suffix,Date,Quantity
0,NW7,S3NQ09AAQAL,EB2GAM1,D-3 01-Dec,0
1,NW7,S3NQ09AA31A,EB2GAM1,D-3 01-Dec,0
2,NW7,S3NQ09AA31B,EB1GAM1,D-3 01-Dec,0
3,NW7,S3NQ09AA33A,EB2GAM1,D-3 01-Dec,0
4,NW7,S3NQ09AM31A,EB2GAM1,D-3 01-Dec,0
...,...,...,...,...,...
62491,NWH,75UH5J-MP,AWZJLRZ,D+182 04-Jun,0
62492,NWH,75UH5N-MP,AWZGLRZ,D+182 04-Jun,0
62493,NWH,75UL3J-EP,AWZFLRZ,D+182 04-Jun,0
62494,NWH,86UH5J-HP,AWZJLRZ,D+182 04-Jun,0


In [138]:
# Concatenar 'Model' e 'Suffix' na nova coluna 'Model.Suffix'
df_pph['Model.Suffix'] = df_pph['Model'] + '.' + df_pph['Suffix']
df_pph = df_pph[['Model.Suffix', 'Org.', 'Date', 'Quantity']]

In [139]:
import pandas as pd
from datetime import datetime

# Alterando a coluna 'Date' de forma segura usando .loc[]
df_pph.loc[:, 'Date'] = df_pph['Date'].str.replace(r'^D[+-]\d+\s', '', regex=True).str.strip()

# Converte a coluna 'Date' para datetime para garantir que a função .dt funcione corretamente
df_pph.loc[:, 'Date'] = pd.to_datetime(df_pph['Date'], format='%d-%b', errors='coerce')

# Extrair o mês da data e determinar o ano
current_year = datetime.now().year

# Definindo uma função para adicionar o ano correto
def add_year(row_date):
    # Verificando se a data não é NaT (Not a Time), caso contrário retorna a data original
    if pd.isna(row_date):
        return row_date
    # Se o mês for anterior ao mês atual, coloca o ano do próximo ano
    if row_date.month < datetime.now().month:
        return row_date.replace(year=current_year + 1)
    else:
        return row_date.replace(year=current_year)

# Aplicando a função add_year de forma segura
df_pph.loc[:, 'Date'] = df_pph['Date'].apply(lambda x: add_year(x))

# Definindo 3 horas da manhã para cada data
df_pph.loc[:, 'Date'] = df_pph['Date'].apply(lambda x: x.replace(hour=3, minute=0, second=0))

# Verificando o DataFrame
df_pph


Unnamed: 0,Model.Suffix,Org.,Date,Quantity
0,S3NQ09AAQAL.EB2GAM1,NW7,2024-12-01 03:00:00,0
1,S3NQ09AA31A.EB2GAM1,NW7,2024-12-01 03:00:00,0
2,S3NQ09AA31B.EB1GAM1,NW7,2024-12-01 03:00:00,0
3,S3NQ09AA33A.EB2GAM1,NW7,2024-12-01 03:00:00,0
4,S3NQ09AM31A.EB2GAM1,NW7,2024-12-01 03:00:00,0
...,...,...,...,...
62491,75UH5J-MP.AWZJLRZ,NWH,2025-06-04 03:00:00,0
62492,75UH5N-MP.AWZGLRZ,NWH,2025-06-04 03:00:00,0
62493,75UL3J-EP.AWZFLRZ,NWH,2025-06-04 03:00:00,0
62494,86UH5J-HP.AWZJLRZ,NWH,2025-06-04 03:00:00,0


### Salvar no banco

In [140]:
from sqlalchemy.types import Integer
# Configuração da conexão com o PostgreSQL
engine = create_engine(f'postgresql+psycopg2://{usuario}:{senha}@{host}:{porta}/{nome_do_banco}')

# Exportar o DataFrame para o banco de dados PostgreSQL
df_pph.to_sql('table_pph', con=engine, if_exists='replace', index=False, dtype={'Quantity': Integer()})
print('Dados salvos com sucesso no banco de dados PostgreSQL.')

Dados salvos com sucesso no banco de dados PostgreSQL.
