In [1]:
import pandas as pd
from tqdm import tqdm
import numpy as np
import itertools

In [2]:
# Carregar o conjunto de dados
print("Carregando o conjunto de dados...")
df = pd.read_csv('D:/projetos/simpress/testes/arquivo_final-27-05-2024.csv')
df = df[df['BrandName'] == 'HP']
df = df.sort_values(by=['RealDateCapture','SerialNumber'], ascending=[False,True])

Carregando o conjunto de dados...


In [3]:
# Converte a coluna 'RealDateCapture' do df para datetime
df['RealDateCapture'] = pd.to_datetime(df['RealDateCapture'], format='%d-%m-%Y')

In [4]:
df_original = df.to_csv('df_original.csv')

In [5]:
print(f"Conjunto de dados carregado com {len(df)} registros.")

Conjunto de dados carregado com 281680 registros.


In [6]:
# Selecionar impressoras únicas (linhas únicas com base na coluna 'SerialNumber')
imp = df.drop_duplicates(subset='SerialNumber')

In [7]:
imp = imp[['EnterpriseName', 'PrinterDeviceID', 'BrandName', 'PrinterModelName', 'SerialNumber']]

In [8]:
imp

Unnamed: 0,EnterpriseName,PrinterDeviceID,BrandName,PrinterModelName,SerialNumber
293169,9853_TRE_BA,280,HP,LaserJet MFP E52645,BRBSN8FC4P
293227,9853_TRE_BA,638,HP,LaserJet MFP E52645,BRBSN8RJLM
293222,9853_TRE_BA,629,HP,LaserJet MFP E52645,BRBSN8RJLT
293149,9853_TRE_BA,383,HP,LaserJet MFP E52645,BRBSN8RJMJ
293148,9853_TRE_BA,382,HP,LaserJet MFP E52645,BRBSN8RJMP
...,...,...,...,...,...
289944,9853_TRE_BA,432,HP,LaserJet MFP E52645,BRBSNDH8CT
289568,9853_TRE_BA,654,HP,Color LaserJet Flow E87760,BRBSR9N0DG
288976,9853_TRE_BA,469,HP,LaserJet MFP E52645,BRBSNCJFXH
282532,9853_TRE_BA,656,HP,LaserJet MFP E52645,BRBSN8SKGK


In [9]:
# Generate a DataFrame of dates
date_range = pd.date_range(start='01-01-2023', end='26-05-2024', freq='D')
df_dates = pd.DataFrame({'RealDateCapture': date_range})

In [10]:
# Adicionar a coluna 'key' para combinação
imp['key'] = 1
df_dates['key'] = 1

# Combinação dos DataFrames
df_full_datas = pd.merge(imp, df_dates, on='key').drop('key', axis=1)


In [11]:
df_full_datas

Unnamed: 0,EnterpriseName,PrinterDeviceID,BrandName,PrinterModelName,SerialNumber,RealDateCapture
0,9853_TRE_BA,280,HP,LaserJet MFP E52645,BRBSN8FC4P,2023-01-01
1,9853_TRE_BA,280,HP,LaserJet MFP E52645,BRBSN8FC4P,2023-01-02
2,9853_TRE_BA,280,HP,LaserJet MFP E52645,BRBSN8FC4P,2023-01-03
3,9853_TRE_BA,280,HP,LaserJet MFP E52645,BRBSN8FC4P,2023-01-04
4,9853_TRE_BA,280,HP,LaserJet MFP E52645,BRBSN8FC4P,2023-01-05
...,...,...,...,...,...,...
177147,9853_TRE_BA,659,HP,Laser 408dn,BRBSRC60PQ,2024-05-22
177148,9853_TRE_BA,659,HP,Laser 408dn,BRBSRC60PQ,2024-05-23
177149,9853_TRE_BA,659,HP,Laser 408dn,BRBSRC60PQ,2024-05-24
177150,9853_TRE_BA,659,HP,Laser 408dn,BRBSRC60PQ,2024-05-25


In [12]:
df_full_datas.to_csv("df_full_datas.csv")

In [13]:
# Fazer o merge com o DataFrame original
df_merged = df_full_datas.merge(df, how='left', on=['RealDateCapture','SerialNumber','EnterpriseName','PrinterDeviceID','BrandName','PrinterModelName'])

In [14]:
# Fill in missing values with NaN
df_merged.fillna(value=np.nan, inplace=True)

In [15]:
df_merged = df_merged.sort_values(by=['RealDateCapture','SerialNumber'], ascending=False)

In [16]:
df_merged.to_csv('df_merged.csv')

In [17]:
df_merged

Unnamed: 0,EnterpriseName,PrinterDeviceID,BrandName,PrinterModelName,SerialNumber,RealDateCapture,AddressName,DateTimeRead,ReferenceMono,ReferenceColor,Engaged
165375,9853_TRE_BA,370,HP,Color LaserJet MFP E77830,CND1N49018,2024-05-26,10.5.201.248 (Network),2024-05-26T22:29:26,5571.0,25316.0,False
164863,9853_TRE_BA,358,HP,Color LaserJet MFP E77830,CND1N49013,2024-05-26,10.5.133.67 (Network),2024-05-26T22:29:27,5874.0,31918.0,False
164351,9853_TRE_BA,388,HP,Color LaserJet MFP E77830,CND1N48061,2024-05-26,10.5.207.44 (Network),2024-05-26T22:29:24,3529.0,13694.0,False
163839,9853_TRE_BA,385,HP,Color LaserJet MFP E77830,CND1N48060,2024-05-26,10.5.202.145 (Network),2024-05-26T22:29:18,5502.0,17316.0,False
163327,9853_TRE_BA,369,HP,Color LaserJet MFP E77830,CND1N470B0,2024-05-26,10.5.201.104 (Network),2024-05-26T22:29:20,6096.0,30413.0,False
...,...,...,...,...,...,...,...,...,...,...,...
2048,9853_TRE_BA,382,HP,LaserJet MFP E52645,BRBSN8RJMP,2023-01-01,10.5.203.15 (Network),2023-01-01T20:42:39,24057.0,0.0,False
1536,9853_TRE_BA,383,HP,LaserJet MFP E52645,BRBSN8RJMJ,2023-01-01,10.5.203.116 (Network),2023-01-01T20:42:39,13658.0,0.0,False
1024,9853_TRE_BA,629,HP,LaserJet MFP E52645,BRBSN8RJLT,2023-01-01,10.5.136.54 (Network),2022-12-22T15:01:47,2703.0,0.0,False
512,9853_TRE_BA,638,HP,LaserJet MFP E52645,BRBSN8RJLM,2023-01-01,,,,,


In [18]:
# impressoras = []
# for impressora in imp:
#     df_parcial = df_merged[df_merged['SerialNumber'] == impressora]
#     df_parcial.bfill()
#     impressoras.append(df_parcial)
# df_filled = pd.concat(impressoras, ignore_index = True)


impressoras = []
for index, row in imp.iterrows():
    serial_number = row['SerialNumber']
    df_parcial = df_merged[df_merged['SerialNumber'] == serial_number].bfill()
    impressoras.append(df_parcial)

df_filled = pd.concat(impressoras, ignore_index=True)

  df_parcial = df_merged[df_merged['SerialNumber'] == serial_number].bfill()


In [19]:
df_filled

Unnamed: 0,EnterpriseName,PrinterDeviceID,BrandName,PrinterModelName,SerialNumber,RealDateCapture,AddressName,DateTimeRead,ReferenceMono,ReferenceColor,Engaged
0,9853_TRE_BA,280,HP,LaserJet MFP E52645,BRBSN8FC4P,2024-05-26,10.171.24.60 (Network),2024-05-26T22:29:24,52335.0,0.0,False
1,9853_TRE_BA,280,HP,LaserJet MFP E52645,BRBSN8FC4P,2024-05-25,10.171.24.60 (Network),2024-05-25T22:22:31,52335.0,0.0,False
2,9853_TRE_BA,280,HP,LaserJet MFP E52645,BRBSN8FC4P,2024-05-24,10.171.24.60 (Network),2024-05-24T22:17:48,52335.0,0.0,False
3,9853_TRE_BA,280,HP,LaserJet MFP E52645,BRBSN8FC4P,2024-05-23,10.171.24.60 (Network),2024-05-23T22:14:21,52308.0,0.0,False
4,9853_TRE_BA,280,HP,LaserJet MFP E52645,BRBSN8FC4P,2024-05-22,10.171.24.60 (Network),2024-05-22T22:51:46,52297.0,0.0,False
...,...,...,...,...,...,...,...,...,...,...,...
177147,9853_TRE_BA,659,HP,Laser 408dn,BRBSRC60PQ,2023-01-05,,,,,
177148,9853_TRE_BA,659,HP,Laser 408dn,BRBSRC60PQ,2023-01-04,,,,,
177149,9853_TRE_BA,659,HP,Laser 408dn,BRBSRC60PQ,2023-01-03,,,,,
177150,9853_TRE_BA,659,HP,Laser 408dn,BRBSRC60PQ,2023-01-02,,,,,


In [20]:
df_filled['RealDateCapture'] = pd.to_datetime(df_filled['RealDateCapture'])
# Converter 'RealDateCapture' para string e filtrar por registros que começam com '2024-'
# df_filled_2024 = df_filled[df_filled['RealDateCapture'].astype(str).str.startswith('2024-')]
df_filled_2024 = df_filled[df_filled['RealDateCapture'] > '2024-01-01']
df_filled_2024 = df_filled_2024.fillna(0)

In [21]:
# Salvar o DataFrame resultante
df_filled_2024.to_csv('D:/projetos/simpress/testes/df_filled_2024.csv', index=False)