# SPRINT - 3

## Item - Desenvolvimento do processamento dos dados para geração dos indicadores no Pipeline, e gravação na "Consumer Data"

## Item - Desenvolvimento do processamento dos dados para geração da consolidação de dados da produção no Pipeline, e gravação na "Consumer Data"

<br>
<br>

Autor.: Sérgio C. Medina

#### Declaração dos Pacotes, Libs ou Classes utilizadas no processo.

In [1]:
# Declaração dos Pacotes, Libs ou Classes utilizadas no processo.
import os
import io
import math
import pandas as pd
import gcsfs
import pyarrow
import pyarrow.parquet as pq
from google.cloud import storage
from datetime import datetime, timedelta

import sys
sys.path.append('../../pods')

bucketName='edc-pa-i4-data'

# configurando variavel de ambiente com o arquivo de credenciais para conexão GCP
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "..\..\secrets\edc-igti-smedina-335018-21855d6bb4b1.json"
#os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "/Users/sergiomedina/Downloads/edc-igti-smedina-4920e12ac565.json"

# Funções de integração com o Cloud Storage
from utilGCS import utilGCS
gcs = utilGCS(projectid='edc-igti-smedina-335018', bucketName=bucketName)

# Funções diversas de manipulação de dados
from utilFuncs import utilFuncs
func = utilFuncs()




#### Dados na "processing-zone" pasta "DATAOP"

In [3]:
dtexec = "2021-11-08"
source = "dataop"
folder = f"edc-pa-i4-data/processing-zone/{source}"

#df_dataop = gcs.read_parquet_to_pandas(path=f"edc-pa-i4-data/processing-zone/{source}")
df_dataop = gcs.read_parquet_to_pandas(path=folder, filters=[('DTPROD', '=', dtexec)])

df_dataop['DTPROD'] = df_dataop['DTPROD'].astype(str)

In [4]:
df_dataop

Unnamed: 0,OP,DTINI,DTFIM,CODMAT,LOTEFAB,QTDPLAN,DTPROD
0,211108L101,2021-11-08 06:00:00,2021-11-09 05:59:59,TB70PVC,TB70394,1320,2021-11-08


#### Dados na "raw-data-zone" pasta "DATACONFIRM"

In [None]:
#dtexec = "2021-11-08"
source = "dataconfirm"
folder = f"edc-pa-i4-data/processing-zone/{source}"

#df_dataconfirm = gcs.read_parquet_to_pandas(path=f"edc-pa-i4-data/processing-zone/{source}")
df_dataconfirm = gcs.read_parquet_to_pandas(path=folder, filters=[('DTPROD', '=', dtexec)])

df_dataconfirm['DTPROD'] = df_dataconfirm['DTPROD'].astype(str)

In [None]:
df_dataconfirm

In [None]:
df_dataconfirm.groupby(
    ['DTPROD', 'IDTURNO', 'BATCH']
).agg(
    {
        'PACKID': 'count',
        'QTDUN': 'sum',
        'KGPACK': 'sum',
        'KGUNMED': 'mean'
    }
)


In [None]:
df_dataconfirm.groupby(
    ['DTPROD', 'BATCH']
).agg(
    {
        'PACKID': 'count',
        'QTDUN': 'sum',
        'KGPACK': 'sum',
        'KGUNMED': 'mean'
    }
)

#### Dados na "processing-zone" pasta "DATAPROD"

In [None]:
#dtexec = "2021-11-08"
lineid = "101"
source = "dataprod"
folder = f"edc-pa-i4-data/processing-zone/{source}"

#df_dataprod = gcs.read_parquet_to_pandas(path=folder)
df_dataprod = gcs.read_parquet_to_pandas(path=folder, filters=[('DTPROD', '=', dtexec),('LINE', '=', lineid)])

df_dataprod['DTPROD'] = df_dataprod['DTPROD'].astype(str)
df_dataprod['LINE'] = df_dataprod['LINE'].astype(str)

In [None]:
df_dataprod.to_csv(path_or_buf=f'/Users/ych885/Downloads/{dtexec}-df_dataprod.csv', sep=';', index=False )

In [None]:
#df_dataprod = df_dataprod.loc[(df_dataprod.DTPROD==dtexec) & (df_dataprod.LINE==lineid)]
df_dataprod

In [None]:
df_dataprod.dtypes

In [None]:
df_dataprod.where((df_dataprod.STSID>=4)).groupby(
    ['DTPROD','BATCH']
).agg(
    {
        'QTDPCS': 'sum',
        'QTDGOOD': 'sum',
        'QTDREJECT': 'sum'
    }
)

### Consolidando dados de Produção


In [None]:
df_prod = df_dataop.copy()

df_prod = df_prod.merge(
    pd.DataFrame(
        df_dataconfirm.groupby(
            ['DTPROD', 'BATCH']
        ).agg(
            {
                'PACKID': 'count',
                'QTDUN': 'sum',
                'KGPACK': 'sum',
                'KGUNMED': 'mean'
            }
        )        
    ).reset_index(), 
    how='inner',
    on=['DTPROD', 'BATCH']
)

df_prod = df_prod.merge( 
    pd.DataFrame(
        df_dataprod.where((df_dataprod.STSID>=4)).groupby(
            ['DTPROD','BATCH']
        ).agg(
            {
                'QTDPCS': 'sum',
                'QTDGOOD': 'sum',
                'QTDREJECT': 'sum',
                'TIMESTAMP': 'max'
            }
        )        
    ).reset_index(), 
    how='inner',
    on=['DTPROD', 'BATCH']
)

df_prod['TMAXLIMIT'] = df_prod.apply(lambda row:func.end_time(row['DTPROD'], 3),axis=1)
df_prod['TMAXLIMIT'] = pd.to_datetime(df_prod['TMAXLIMIT'])
df_prod['TMAXDIFF'] = (df_prod['TMAXLIMIT']-df_prod['TIMESTAMP']).dt.seconds/60
df_prod['QTDWIP'] = df_prod.apply(lambda row:func.round_down(row['TMAXDIFF'], 0),axis=1)

df_prod.rename(columns = {'PACKID':'QTDPACKS', 'QTDUN':'QTDCONFIRM', 'KGPACK':'TOTKGPACK'}, inplace = True)

df_prod['QTDIFF'] = df_prod['QTDCONFIRM'] - df_prod['QTDGOOD']

df_prod = df_prod[['DTPROD', 'OP', 'DTINI', 'DTFIM', 'CODMAT', 'BATCH', 'QTDPLAN', 'QTDCONFIRM', 'QTDIFF', 'QTDPCS', 'QTDGOOD', 'QTDREJECT', 'QTDWIP', 'QTDPACKS', 'TOTKGPACK', 'KGUNMED']]

df_prod

In [None]:
df_prod.to_csv(path_or_buf=f'/Users/ych885/Downloads/{dtexec}-df_prod.csv', sep=';', index=False, decimal=',' )

In [None]:
(df_prod['QTDCONFIRM']/df_prod['QTDPLAN'])

#### Dados na "processing-zone" pasta "DATAPROD" - CALC OEE

In [None]:
# Totalizando por Data, turno, Linha e Status
df_sts = pd.DataFrame(df_dataprod.groupby(
#    ['DTPROD', 'IDTURNO', 'LINE', 'OP', 'BATCH', 'STSID', 'STSDS']
    ['DTPROD', 'IDTURNO', 'LINE', 'STSID', 'STSDS']
).agg(
    {
        'TOTMIN': 'sum',
        'QTDPCS': 'sum',
        'QTDGOOD': 'sum',
        'QTDREJECT': 'sum'
    }
)).reset_index()


# Clonando a base carregada dno DATAPROD
df = df_dataprod.copy()

# Preparando para consolidação de dados por Data, turno e Linha 
df['TMIN'] = df['TIMESTAMP']
df['TMAX'] = df['TIMESTAMP']
df.drop(['TIMESTAMP'], axis=1, inplace=True)

df = pd.DataFrame(
    df.where((df.STSID>=4)).groupby(
        # ['DTPROD', 'IDTURNO', 'LINE', 'OP', 'BATCH']
        ['DTPROD', 'IDTURNO', 'LINE']
    ).agg(
        {
            'TOTMIN': 'sum',
            'QTDPCS': 'sum',
            'QTDGOOD': 'sum',
            'QTDREJECT': 'sum',
            'TMIN': 'min',
            'TMAX': 'max'
        }
    )
).reset_index()


# Tratamento das informações
df['TMAXLIMIT'] = df.apply(lambda row:func.end_time(row['DTPROD'], row['IDTURNO']),axis=1)
df['TMAXLIMIT'] = pd.to_datetime(df['TMAXLIMIT'])
df['TMAXDIFF'] = (df['TMAXLIMIT']-df['TMAX']).dt.seconds/60
df['QTDWIP'] = df.apply(lambda row:func.round_down(row['TMAXDIFF'], 0),axis=1)

df['TMINLIMIT'] = df.apply(lambda row:func.start_time(row['DTPROD'], row['IDTURNO']),axis=1)
df['TMINLIMIT'] = pd.to_datetime(df['TMINLIMIT'])
df['TMINDIFF'] = (df['TMIN']-df['TMINLIMIT']).dt.seconds/60

df['DIFF'] = df['TMAXDIFF'].diff().fillna(0)

df['TOTMINADJUSTED'] = round(df['TOTMIN'] + df['DIFF'],0)

df.drop(['TMAX', 'TMAXLIMIT', 'TMAXDIFF'], axis=1, inplace=True)
df.drop(['TMIN', 'TMINLIMIT', 'TMINDIFF', 'DIFF'], axis=1, inplace=True)

# Tot. Tempo turno
df['TPTOTAL'] = 480

print('DEBUG -> [ANTES AJUSTE]:','TOTMINADJUSTED=',df['TOTMINADJUSTED'].sum(),'\n\r',df['TOTMINADJUSTED'])

df['TOTMINADJUSTED'] = df.apply(lambda row:(row['TOTMINADJUSTED'] if row['TOTMINADJUSTED']<480 else 480),axis=1)

print('DEBUG -> [DEPOIS AJUSTE]:','TOTMINADJUSTED=',df['TOTMINADJUSTED'].sum(),'\n\r',df['TOTMINADJUSTED'])

# Tot. Tempo = WORKING
df['TPWORKING'] = df.apply(
    lambda row:df_sts.where(
    (df_sts.STSID>=4) &
    (df_sts.DTPROD==row['DTPROD']) &
    (df_sts.IDTURNO==row['IDTURNO']) &
    (df_sts.LINE==row['LINE']) 
    # &
    # (df_sts.OP==row['OP']) &
    # (df_sts.BATCH==row['BATCH']) 
    ).agg(
        {'TOTMIN':'sum'}
    )
    ,axis=1)

print('DEBUG -> [ANTES AJUSTE]:','TPWORKING=',df['TPWORKING'].sum(),'\n\r',df['TPWORKING'])

df['TPWORKING'] = df.apply(lambda row:(row['TPWORKING'] if row['TPWORKING']<=row['TOTMINADJUSTED'] else row['TOTMINADJUSTED']),axis=1)

print('DEBUG -> [DEPOIS AJUSTE]:','TPWORKING=',df['TPWORKING'].sum(),'\n\r',df['TPWORKING'])

# Tot. Tempo = TPSTOPPLAN
df['TPSTOPPLAN'] = df.apply(
    lambda row:df_sts.where(
    (df_sts.STSID==3) &
    (df_sts.DTPROD==row['DTPROD']) &
    (df_sts.IDTURNO==row['IDTURNO']) &
    (df_sts.LINE==row['LINE']) 
    # &
    # (df_sts.OP==row['OP']) &
    # (df_sts.BATCH==row['BATCH']) 
    ).agg(
        {'TOTMIN':'sum'}
    )
    ,axis=1)

# Tot. Tempo = TPNOALLOC
df['TPNOALLOC'] = df.apply(
    lambda row:df_sts.where(
    (df_sts.STSID==0) &
    (df_sts.DTPROD==row['DTPROD']) &
    (df_sts.IDTURNO==row['IDTURNO']) &
    (df_sts.LINE==row['LINE']) 
    # &
    # (df_sts.OP==row['OP']) &
    # (df_sts.BATCH==row['BATCH']) 
    ).agg(
        {'TOTMIN':'sum'}
    )
    ,axis=1)    


# Tempo Programado para produzir
df['TPPROG'] = df['TPTOTAL'] - (df['TPNOALLOC']+df['TPSTOPPLAN'])


# Temp Ociosidade
df['TPIDLE'] = df.apply(
    lambda row:df_sts.where(
    (df_sts.STSID>=1) &
    (df_sts.STSID<3) &
    (df_sts.DTPROD==row['DTPROD']) &
    (df_sts.IDTURNO==row['IDTURNO']) &
    (df_sts.LINE==row['LINE']) 
    # &
    # (df_sts.OP==row['OP']) &
    # (df_sts.BATCH==row['BATCH']) 
    ).agg(
        {'TOTMIN':'sum'}
    )
    ,axis=1)

df['TPIDLE'] = ((df['TPPROG']-(df['TPWORKING'] + df['TPIDLE']))+df['TPIDLE'])

# Unidades - Produção Teórica
df['QTDPCSTHEOR'] = df['TPWORKING'] * 1

# Unidades - Perda por performance
df['QTDPCSLOSS'] = df['QTDPCSTHEOR'] - df['QTDPCS']

# OEE Calc 
df['OEEDISP'] = round(df['TPWORKING'] / df['TPPROG'],3)
df['OEEPERF'] = round(df['QTDPCS'] / df['QTDPCSTHEOR'],3)
df['OEEQUAL'] = round(df['QTDGOOD'] / df['QTDPCS'],3)
df['OEE'] = round(df['OEEDISP'] * df['OEEPERF'] * df['OEEQUAL'],3)

In [None]:
df.dtypes

In [None]:
df[['DTPROD','IDTURNO','LINE','TPTOTAL','TPPROG','TPWORKING','TOTMINADJUSTED','TPSTOPPLAN','TPNOALLOC','TPIDLE']]


# TOTMIN            float64


In [None]:
df[['DTPROD','IDTURNO','QTDPCS','QTDGOOD','QTDREJECT','QTDWIP','QTDPCSTHEOR','QTDPCSLOSS']]

In [None]:
df[['DTPROD','IDTURNO','LINE','OEEDISP', 'OEEPERF', 'OEEQUAL', 'OEE']]

In [None]:

df_oee = pd.DataFrame(
    df.groupby(
        ['DTPROD']
    ).agg(
        {
            'TPTOTAL': 'sum',
            'TPPROG': 'sum',
            'TPWORKING': 'sum',
            'TPSTOPPLAN': 'sum',
            'TPNOALLOC': 'sum',
            'TPIDLE': 'sum',
            'QTDPCS': 'sum',
            'QTDGOOD': 'sum',
            'QTDREJECT': 'sum',
            'QTDPCSTHEOR': 'sum',
            'QTDPCSLOSS': 'sum',

        }
    )
).reset_index()

df_oee['OEEDISP'] = round(df_oee['TPWORKING']/df_oee['TPPROG'],3)

df_oee['OEEPERF'] = round(df_oee['QTDPCS'] / df_oee['QTDPCSTHEOR'],3)

df_oee['OEEQUAL'] = round(df_oee['QTDGOOD'] / df_oee['QTDPCS'],3)

df_oee['OEE'] = round(df_oee['OEEDISP'] * df_oee['OEEPERF'] * df_oee['OEEQUAL'],3)


df_oee


In [None]:
# df.agg(
#     {
#         'OEEDISP': 'mean',
#         'OEEPERF': 'mean',
#         'OEEQUAL': 'mean',
#         'OEE': 'mean'
#     }
# )


df.groupby(
    ['DTPROD']
).agg(
    {
        'OEEDISP': 'mean',
        'OEEPERF': 'mean',
        'OEEQUAL': 'mean',
        'OEE': 'mean'
    }
)




## Pós Desenvolvimento - Verificação dos dados gravados

In [2]:
dtexec = "2021-11-12"

In [3]:
# MES-PROD
source = "mesprod"
folder = f"{bucketName}/consumer-zone/{source}"
df_mesprod = gcs.read_parquet_to_pandas(path=folder, filters=[('DTPROD', '=', dtexec)])

df_mesprod

Unnamed: 0,OP,DTINI,DTFIM,CODMAT,BATCH,QTDPLAN,QTDCONFIRM,QTDIFF,QTDPCS,QTDGOOD,QTDREJECT,QTDWIP,QTDPACKS,TOTKGPACK,KGUNMED,DTPROD
0,211112L101,2021-11-12 06:00:00,2021-11-13 05:59:59,TB70PVC,TB70407,1320,1257,0.0,1270.0,1257.0,13.0,38.0,24,7284.848,5.795417,2021-11-12


In [4]:
# MES-OEE-LINE
source = "mesoeeline"
folder = f"{bucketName}/consumer-zone/{source}"
df_mesoeeline = gcs.read_parquet_to_pandas(path=folder, filters=[('DTPROD', '=', dtexec)])

df_mesoeeline

Unnamed: 0,IDTURNO,LINE,TOTMIN,QTDPCS,QTDGOOD,QTDREJECT,QTDWIP,TOTMINADJUSTED,TPTOTAL,TPWORKING,...,TPNOALLOC,TPPROG,TPIDLE,QTDPCSTHEOR,QTDPCSLOSS,OEEDISP,OEEPERF,OEEQUAL,OEE,DTPROD
0,1.0,101,438.64,408.0,405.0,3.0,2.0,439.0,480,438.64,...,0.0,480.0,41.36,438.64,30.64,0.914,0.93,0.993,0.844,2021-11-12
1,2.0,101,481.59,456.0,451.0,5.0,0.0,480.0,480,480.0,...,0.0,480.0,0.0,480.0,24.0,1.0,0.95,0.989,0.94,2021-11-12
2,3.0,101,441.77,406.0,401.0,5.0,38.0,480.0,480,441.77,...,27.46,452.54,10.77,441.77,35.77,0.976,0.919,0.988,0.886,2021-11-12


In [8]:
df_mesoeeline[['DTPROD','IDTURNO','LINE','TOTMIN','QTDPCS','QTDGOOD','QTDREJECT','QTDWIP',
'TOTMINADJUSTED','TPTOTAL','TPWORKING','TPSTOPPLAN','TPNOALLOC','TPPROG','TPIDLE','QTDPCSTHEOR','QTDPCSLOSS']]

Unnamed: 0,DTPROD,IDTURNO,LINE,TOTMIN,QTDPCS,QTDGOOD,QTDREJECT,QTDWIP,TOTMINADJUSTED,TPTOTAL,TPWORKING,TPSTOPPLAN,TPNOALLOC,TPPROG,TPIDLE,QTDPCSTHEOR,QTDPCSLOSS
0,2021-11-12,1.0,101,438.64,408.0,405.0,3.0,2.0,439.0,480,438.64,0.0,0.0,480.0,41.36,438.64,30.64
1,2021-11-12,2.0,101,481.59,456.0,451.0,5.0,0.0,480.0,480,480.0,0.0,0.0,480.0,0.0,480.0,24.0
2,2021-11-12,3.0,101,441.77,406.0,401.0,5.0,38.0,480.0,480,441.77,0.0,27.46,452.54,10.77,441.77,35.77


In [10]:
df_mesoeeline[['DTPROD','OEEDISP','OEEPERF','OEEQUAL','OEE']]

Unnamed: 0,DTPROD,OEEDISP,OEEPERF,OEEQUAL,OEE
0,2021-11-12,0.914,0.93,0.993,0.844
1,2021-11-12,1.0,0.95,0.989,0.94
2,2021-11-12,0.976,0.919,0.988,0.886


In [5]:
# MES-OEE-DT
source = "mesoeedt"
folder = f"{bucketName}/consumer-zone/{source}"
df_mesoeedt = gcs.read_parquet_to_pandas(path=folder, filters=[('DTPROD', '=', dtexec)])

df_mesoeedt

Unnamed: 0,TPTOTAL,TPPROG,TPWORKING,TPSTOPPLAN,TPNOALLOC,TPIDLE,QTDPCS,QTDGOOD,QTDREJECT,QTDPCSTHEOR,QTDPCSLOSS,OEEDISP,OEEPERF,OEEQUAL,OEE,DTPROD
0,1440,1412.54,1360.41,0.0,27.46,52.13,1270.0,1257.0,13.0,1360.41,90.41,0.963,0.934,0.99,0.89,2021-11-12
