In [23]:
import datetime
import subprocess
import numpy as np
import pandas as pd
import seaborn as sns 
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import feature_column
from sklearn.model_selection import train_test_split

In [24]:
# IF DATA IS IN YOUR DRIVE
data = pd.read_excel('BLACK_BELT_DATABASE_CASE_COMPLETE.xlsx', header=0)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6182 entries, 0 to 6181
Data columns (total 36 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Unnamed: 0            0 non-null      float64       
 1   PART_NUMBER           6182 non-null   object        
 2   REV                   6182 non-null   object        
 3   DESCRIPTION           6182 non-null   object        
 4   CONFIGURATION         6182 non-null   object        
 5   RELEASED_DATE         6182 non-null   datetime64[ns]
 6   OBJECT_ID_3D          6182 non-null   int64         
 7   FILE_NAME_3D          6182 non-null   object        
 8   CLASS_3D              6182 non-null   object        
 9   DRAWING_CODE_3D       6180 non-null   object        
 10  ATP_3D                6182 non-null   object        
 11  QTN_REV_3D            6182 non-null   int64         
 12  MEAN_SIZE_3D          5559 non-null   float64       
 13  OBJECT_ID_2D      

In [26]:
# Função para pré-processar os dados
def preProcessingDataBase(data):

    to_drop = ['NEW_DEV',
               'HOV',
               'RTF',
               'COMPLEXITY',
               'CREATED_ON',
               'COMPLETED_ON',
               'CLASS_2D',
               'PART_NUMBER',
               'REV',
               'DESCRIPTION',
               'CONFIGURATION',
               'RELEASED_DATE',
               'OBJECT_ID_3D',
               'FILE_NAME_3D',
               'CLASS_3D',
               'DRAWING_CODE_3D',
               'ATP_3D',
               'OBJECT_ID_2D',
               'FILE_NAME_2D', 
               'TRIM_AND_FINISH',
               'NEW_DESIGN',
               'WORKFLOW',
               'Unnamed: 0']

    data.drop(to_drop, inplace=True, axis=1)

    # QTN_REV_3D
    data = data[~data['QTN_REV_3D'].isnull()]
    data['QTN_REV_3D'] = data['QTN_REV_3D'].dropna()
    data['QTN_REV_3D'] = (data['QTN_REV_3D']-data['QTN_REV_3D'].min())/(data['QTN_REV_3D'].max()-data['QTN_REV_3D'].min())

    # DRAWING_CODE
    data = data[~data['DRAWING_CODE_2D'].isnull()]
    data['DRAWING_CODE_2D'] = data['DRAWING_CODE_2D'].dropna()
    #data = data.join(pd.get_dummies(data.pop('DRAWING_CODE')))

    # ATP
    data = data[~data['ATP_2D'].isnull()]
    data['ATP_2D'] = data['ATP_2D'].dropna()
    #data = data.join(pd.get_dummies(data.pop('ATP')))

    # QTN_REV_2D
    data = data[~data['QTN_REV_2D'].isnull()]
    data['QTN_REV_2D'] = data['QTN_REV_2D'].dropna()
    data['QTN_REV_2D'] = (data['QTN_REV_2D']-data['QTN_REV_2D'].min())/(data['QTN_REV_2D'].max()-data['QTN_REV_2D'].min())

    # QTY_ECN_2D
    data['QTY_ECN_2D'] = data['QTY_ECN_2D'].fillna(0)
    data.loc[(data.QTY_ECN_2D != 0), 'QTY_ECN_2D'] = "RUIM"
    data.loc[(data.QTY_ECN_2D == 0), 'QTY_ECN_2D'] = "BOM"
    data['QTY_ECN_2D'] = pd.Series(np.searchsorted(['BOM', 'RUIM'], data.QTY_ECN_2D.values), data.index)
    #data['QTY_ECN_2D'] = data.loc[(data.QTY_ECN_2D == 0), 'QTY_ECN_2D']

    # LEAD_TIME
    data = data[~data['LEAD_TIME'].isnull()]
    data['LEAD_TIME'] = data['LEAD_TIME'].dropna()
    data['LEAD_TIME'] = (data['LEAD_TIME']-data['LEAD_TIME'].min())/(data['LEAD_TIME'].max()-data['LEAD_TIME'].min())

    # MEAN_SIZE_3D
    data = data[~data['MEAN_SIZE_3D'].isnull()]
    data['MEAN_SIZE_3D'] = data['MEAN_SIZE_3D'].dropna()
    data['MEAN_SIZE_3D'] = (data['MEAN_SIZE_3D']-data['MEAN_SIZE_3D'].min())/(data['MEAN_SIZE_3D'].max()-data['MEAN_SIZE_3D'].min())

    # MEAN_SIZE_2D
    data = data[~data['MEAN_SIZE_2D'].isnull()]
    data['MEAN_SIZE_2D'] = data['MEAN_SIZE_2D'].dropna()
    data['MEAN_SIZE_2D'] = (data['MEAN_SIZE_2D']-data['MEAN_SIZE_2D'].min())/(data['MEAN_SIZE_2D'].max()-data['MEAN_SIZE_2D'].min())

    # QTY_SHEETS
    data['QTY_SHEETS'] = data['QTY_SHEETS'].fillna(0)
    data['QTY_SHEETS'] = (data['QTY_SHEETS']-data['QTY_SHEETS'].min())/(data['QTY_SHEETS'].max()-data['QTY_SHEETS'].min())

    # QTY_DIMENSIONS
    # FOI ALTERADO PARA QUANDO VAZIO RECEBER 0
    data['QTY_DIMENSIONS'] = data['QTY_DIMENSIONS'].fillna(0)
    data['QTY_DIMENSIONS'] = (data['QTY_DIMENSIONS']-data['QTY_DIMENSIONS'].min())/(data['QTY_DIMENSIONS'].max()-data['QTY_DIMENSIONS'].min())

    # QTY_VIEWS
    # FOI ALTERADO PARA QUANDO VAZIO RECEBER 0
    data['QTY_VIEWS'] = data['QTY_VIEWS'].fillna(0)
    data['QTY_VIEWS'] = (data['QTY_VIEWS']-data['QTY_VIEWS'].min())/(data['QTY_VIEWS'].max()-data['QTY_VIEWS'].min())

    # QTY_PART_LIST
    # FOI ALTERADO PARA QUANDO VAZIO RECEBER 1
    data['QTY_PART_LIST'] = data['QTY_PART_LIST'].fillna(1)
    data['QTY_PART_LIST'] = (data['QTY_PART_LIST']-data['QTY_PART_LIST'].min())/(data['QTY_PART_LIST'].max()-data['QTY_PART_LIST'].min())

    # QTY_TEXT_INFORMATION
    # FOI ALTERADO PARA QUANDO VAZIO RECEBER 0
    data['QTY_TEXT_INFORMATION'] = data['QTY_TEXT_INFORMATION'].fillna(0)
    data['QTY_TEXT_INFORMATION'] = (data['QTY_TEXT_INFORMATION']-data['QTY_TEXT_INFORMATION'].min())/(data['QTY_TEXT_INFORMATION'].max()-data['QTY_TEXT_INFORMATION'].min())

    return data

In [27]:
data = preProcessingDataBase(data)
data.info()
data.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5555 entries, 0 to 6181
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   QTN_REV_3D            5555 non-null   float64
 1   MEAN_SIZE_3D          5555 non-null   float64
 2   DRAWING_CODE_2D       5555 non-null   object 
 3   ATP_2D                5555 non-null   object 
 4   QTN_REV_2D            5555 non-null   float64
 5   QTY_ECN_2D            5555 non-null   int64  
 6   MEAN_SIZE_2D          5555 non-null   float64
 7   QTY_SHEETS            5555 non-null   float64
 8   QTY_DIMENSIONS        5555 non-null   float64
 9   QTY_VIEWS             5555 non-null   float64
 10  QTY_PART_LIST         5555 non-null   float64
 11  QTY_TEXT_INFORMATION  5555 non-null   float64
 12  LEAD_TIME             5555 non-null   float64
dtypes: float64(10), int64(1), object(2)
memory usage: 607.6+ KB


Unnamed: 0,QTN_REV_3D,MEAN_SIZE_3D,DRAWING_CODE_2D,ATP_2D,QTN_REV_2D,QTY_ECN_2D,MEAN_SIZE_2D,QTY_SHEETS,QTY_DIMENSIONS,QTY_VIEWS,QTY_PART_LIST,QTY_TEXT_INFORMATION,LEAD_TIME
0,0.0,0.014674,PL,F49-1-AFR1,0.0,0,0.010192,0.2,0.357143,0.236364,0.037125,0.268595,0.980932
1,0.0,0.005768,DA,F49-1-AFR1,0.0,0,0.003197,0.1,0.057143,0.109091,0.006044,0.102617,0.980602
2,0.0,0.014559,NM,F49-1-AFR1,0.0,0,0.001954,0.1,0.078571,0.090909,0.000216,0.085399,0.980602
3,0.125,0.026743,IN,F49-1-AFR1,0.125,1,0.034812,0.3,0.0,0.290909,0.061731,0.124656,0.980525
4,0.0,0.041557,TA,F49-1-AFR1,0.0,0,0.138805,0.2,0.078571,0.4,0.197928,0.247245,0.98083


In [28]:
# Post pré-processing
data, validation_data = train_test_split(data, test_size=0.3)
validation_data, test_data = train_test_split(validation_data, test_size=0.5)

In [29]:
## Contar saídas
data["QTY_ECN_2D"].value_counts()

0    2880
1    1008
Name: QTY_ECN_2D, dtype: int64

In [None]:
## Balancear saídas
data = data.groupby('QTY_ECN_2D').sample(n=4110)

In [None]:
## Visualizar
sample_data = data.sample(frac=0.3)
sns.pairplot(sample_data, hue="QTY_ECN_2D", palette="tab10")

In [30]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('QTY_ECN_2D')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [31]:
feature_columns = []

to_feature = ['QTN_REV_3D', 
              'MEAN_SIZE_3D',
              'QTN_REV_2D',
              'MEAN_SIZE_2D', 
              'QTY_SHEETS', 
              'QTY_DIMENSIONS', 
              'QTY_VIEWS', 
              'QTY_PART_LIST', 
              'QTY_TEXT_INFORMATION',
              'LEAD_TIME']

for header in to_feature:
  feature_columns.append(feature_column.numeric_column(header))


feature_columns.append(feature_column.embedding_column(feature_column.categorical_column_with_vocabulary_list('ATP_2D', data.ATP_2D.unique()), dimension=8))
feature_columns.append(feature_column.embedding_column(feature_column.categorical_column_with_vocabulary_list('DRAWING_CODE_2D', data.DRAWING_CODE_2D.unique()), dimension=8))

feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [32]:
batch_size = 8
train_ds = df_to_dataset(data, shuffle=True, batch_size=batch_size)
validation_ds = df_to_dataset(validation_data, shuffle=True, batch_size=batch_size)
test_ds = df_to_dataset(test_data, shuffle=False, batch_size=batch_size)

In [33]:
model = tf.keras.Sequential([
  feature_layer,
  layers.Dense(16, activation='relu'),
  layers.Dropout(.75),
  layers.Dense(8, activation='relu'),
  layers.Dropout(.50),
  layers.Dense(4, activation='relu'),
  layers.Dropout(.25),
  layers.Dense(1, activation='relu'),
  layers.Dropout(.125),
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.16e-3),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])


log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

subprocess.Popen("tensorboard --logdir /home/matheus/Devtools/safran-black-belt/logs/fit", shell=True)

model.fit(train_ds,
          validation_data=train_ds,
          epochs=1000, 
          callbacks=[tensorboard_callback])



Epoch 1/10000

2022-10-18 23:35:33.718989: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_SYSTEM_DRIVER_MISMATCH: system has unsupported display driver / cuda driver combination
2022-10-18 23:35:33.719063: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: matheus-notebook
2022-10-18 23:35:33.719082: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: matheus-notebook
2022-10-18 23:35:33.719320: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 510.85.2
2022-10-18 23:35:33.719377: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 515.65.1
2022-10-18 23:35:33.719391: E tensorflow/stream_executor/cuda/cuda_diagnostics.cc:313] kernel version 515.65.1 does not match DSO version 510.85.2 -- cannot find working devices in this configuration

NOTE: Using experimental fast data loading logic. To disable, pass
    "--load_



Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.9.1 at http://localhost:6006/ (Press CTRL+C to quit)


Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
Epoch 18/10000
Epoch 19/10000
Epoch 20/10000
Epoch 21/10000
Epoch 22/10000
Epoch 23/10000
Epoch 24/10000
Epoch 25/10000
Epoch 26/10000
Epoch 27/10000
Epoch 28/10000
Epoch 29/10000
Epoch 30/10000
Epoch 31/10000
Epoch 32/10000
Epoch 33/10000
Epoch 34/10000
Epoch 35/10000
Epoch 36/10000
Epoch 37/10000
Epoch 38/10000
Epoch 39/10000
Epoch 40/10000
Epoch 41/10000
Epoch 42/10000
Epoch 43/10000
Epoch 44/10000
Epoch 45/10000
Epoch 46/10000
Epoch 47/10000
Epoch 48/10000
Epoch 49/10000
Epoch 50/10000
Epoch 51/10000
Epoch 52/10000
Epoch 53/10000
Epoch 54/10000
Epoch 55/10000
Epoch 56/10000
Epoch 57/10000
Epoch 58/10000
Epoch 59/10000
Epoch 60/10000
Epoch 61/10000
Epoch 62/10000
Epoch 63/10000
Epoch 64/10000
Epoch 65/10000
Epoch 66/10000
Epoch 67/10000
Epoch 68/10000
Epo

KeyboardInterrupt: 

In [35]:
model.save(log_dir)













INFO:tensorflow:Assets written to: logs/fit/20221018-233529/assets


INFO:tensorflow:Assets written to: logs/fit/20221018-233529/assets


In [None]:
loss, accuracy = model.evaluate(test_ds)
print("Accuracy", accuracy)