In [None]:
# Inclusão das bibliotecas e módulos utilizados
import numpy as np  # Numpy: biblioteca para manipular vetores e matrizes
import pandas as pd # Pandas: biblioteca para manipular tabelas
import matplotlib.pyplot as plt # matplotlib: biblioteca para gráficos
import seaborn as sns # seaborn: biblioteca para gráficos

# Pre-processamento
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.utils import resample
from sklearn.feature_selection import VarianceThreshold, SelectKBest, mutual_info_classif, RFECV
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE

# Métricas de desempenho
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, make_scorer, roc_curve, roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix, plot_roc_curve, auc

# Classificadores
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC, SVR
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier

# Pipeline
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_validate
from sklearn.compose import ColumnTransformer
from sklearn.base import TransformerMixin, BaseEstimator, clone
from sklearn.model_selection import cross_val_predict, cross_val_score

# Árvore
from sklearn.tree import export_graphviz
import graphviz

import sklearn as skl
from sklearn import svm
from sklearn import datasets
from sklearn.cluster import KMeans

from statistics import mode
import jellyfish

In [None]:
# IF DATA IS IN YOUR DRIVE
data = pd.read_excel('BLACK_BELT_DATABASE_CASE_FISRT_ANALISYS.xlsx', header=0)
data.info()
data.head()

In [None]:
# Semente aleatória para reproducibilidade dos experimentos
seed = 123

In [None]:
# Função para pré-processar os dados
def preProcessingDataBase(data):

    # PART_NUMBER
    data = data.drop('PART_NUMBER',axis=1)

    # REV
    data = data.drop('REV',axis=1)

    # DESCRIPTION
    data = data.drop('DESCRIPTION',axis=1) #MODIFY TO GROUP BY SIMILARITY

    # RELEASED_DATE_1
    data = data.drop('RELEASED_DATE_1',axis=1)

    # RELEASED_DATE
    data = data.drop('RELEASED_DATE',axis=1)

    # QTN_REV_3D
    data['QTN_REV_3D'] = data['QTN_REV_3D'].dropna()
    data['QTN_REV_3D'] = (data['QTN_REV_3D']-data['QTN_REV_3D'].min())/(data['QTN_REV_3D'].max()-data['QTN_REV_3D'].min())

    # MEAN_SIZE_3D
    data['MEAN_SIZE_3D'] = data['MEAN_SIZE_3D'].dropna()
    data['MEAN_SIZE_3D'] = (data['MEAN_SIZE_3D']-data['MEAN_SIZE_3D'].min())/(data['MEAN_SIZE_3D'].max()-data['MEAN_SIZE_3D'].min())

    # DRAWING_CODE
    data['DRAWING_CODE'] = data['DRAWING_CODE'].dropna()
    data['DRAWING_CODE'] = data['DRAWING_CODE'][data['DRAWING_CODE'] != "EL"]
    data['DRAWING_CODE'] = data['DRAWING_CODE'][data['DRAWING_CODE'] != ""]
    data = data.join(pd.get_dummies(data.pop('DRAWING_CODE')))

    # ATP
    data['ATP'] = data['ATP'].dropna()
    data['ATP'] = data['ATP'][data['ATP'] != ""]
    data = data.join(pd.get_dummies(data.pop('ATP')))


    # QTN_REV_2D
    data['QTN_REV_2D'] = data['QTN_REV_2D'].dropna()
    data['QTN_REV_2D'] = (data['QTN_REV_2D']-data['QTN_REV_2D'].min())/(data['QTN_REV_2D'].max()-data['QTN_REV_2D'].min())


    # QTY_ECN_2D
    data['QTY_ECN_2D'] = data['QTY_ECN_2D'].dropna()
    data['QTY_ECN_2D'] = (data['QTY_ECN_2D']-data['QTY_ECN_2D'].min())/(data['QTY_ECN_2D'].max()-data['QTY_ECN_2D'].min())
	

    # MEAN_SIZE_2D
    data['MEAN_SIZE_2D'] = data['MEAN_SIZE_2D'].dropna()
    data['MEAN_SIZE_2D'] = (data['MEAN_SIZE_2D']-data['MEAN_SIZE_2D'].min())/(data['MEAN_SIZE_2D'].max()-data['MEAN_SIZE_2D'].min())


    # QTY_SHEETS
    data['QTY_SHEETS'] = data['QTY_SHEETS'].dropna()
    data['QTY_SHEETS'] = (data['QTY_SHEETS']-data['QTY_SHEETS'].min())/(data['QTY_SHEETS'].max()-data['QTY_SHEETS'].min())

    # QTY_DIMENSIONS
    data['QTY_DIMENSIONS'] = data['QTY_DIMENSIONS'].dropna()
    data['QTY_DIMENSIONS'] = (data['QTY_DIMENSIONS']-data['QTY_DIMENSIONS'].min())/(data['QTY_DIMENSIONS'].max()-data['QTY_DIMENSIONS'].min())

    # QTY_VIEWS
    data['QTY_VIEWS'] = data['QTY_VIEWS'].dropna()
    data['QTY_VIEWS'] = (data['QTY_VIEWS']-data['QTY_VIEWS'].min())/(data['QTY_VIEWS'].max()-data['QTY_VIEWS'].min())

    # QTY_PART_LIST
    data['QTY_PART_LIST'] = data['QTY_PART_LIST'].dropna()
    data['QTY_PART_LIST'] = (data['QTY_PART_LIST']-data['QTY_PART_LIST'].min())/(data['QTY_PART_LIST'].max()-data['QTY_PART_LIST'].min())

    # QTY_TEXT_INFORMATION
    data['QTY_TEXT_INFORMATION'] = data['QTY_TEXT_INFORMATION'].dropna()
    data['QTY_TEXT_INFORMATION'] = (data['QTY_TEXT_INFORMATION']-data['QTY_TEXT_INFORMATION'].min())/(data['QTY_TEXT_INFORMATION'].max()-data['QTY_TEXT_INFORMATION'].min())

    # CREATED_ON
    data = data.drop('CREATED_ON',axis=1)

    # CREATED_ON_1
    data = data.drop('CREATED_ON_1',axis=1)

    # LEAD_TIME_1
    data = data.drop('LEAD_TIME_1',axis=1)

    # LEAD_TIME
    data['LEAD_TIME'] = data['LEAD_TIME'].dropna()
    data['LEAD_TIME'] = data['LEAD_TIME'][data['LEAD_TIME'] > 0]
    data['LEAD_TIME'] = (data['LEAD_TIME']-data['LEAD_TIME'].min())/(data['LEAD_TIME'].max()-data['LEAD_TIME'].min())

    # TRIM_AND_FINISH
    data['TRIM_AND_FINISH'] = data['TRIM_AND_FINISH'].dropna()
    data['TRIM_AND_FINISH'] = (data['TRIM_AND_FINISH']*-1)

    # LEAD_TIME
    data['LEAD_TO_RELEASE'] = data['LEAD_TO_RELEASE'].dropna()
    data['LEAD_TO_RELEASE'] = data['LEAD_TO_RELEASE'][data['LEAD_TO_RELEASE'] > 0]
    data['LEAD_TO_RELEASE'] = (data['LEAD_TO_RELEASE']-data['LEAD_TO_RELEASE'].min())/(data['LEAD_TO_RELEASE'].max()-data['LEAD_TO_RELEASE'].min())

    # DROP ANY ROW NULL
    data = data.dropna()

    return data

In [None]:
# Pré-processing
data = preProcessingDataBase(data)
# sample_data = data.sample(frac=1)
data.info()

In [None]:
data.head(5)

Visualizar

In [None]:
## Data Without Pre-Processing
# =============================================================================
sns.pairplot(data, hue="QTY_SHEETS", palette="tab10")
# =============================================================================

In [None]:
## Data Without Pre-Processing
# =============================================================================
sns.pairplot(data, hue="QTN_REV_3D", palette="tab10")
# =============================================================================

In [None]:
## Data Without Pre-Processing
# =============================================================================
sns.pairplot(data, hue="QTN_REV_2D", palette="tab10")
# =============================================================================

In [None]:
## Data Without Pre-Processing
# =============================================================================
sns.pairplot(data, hue="DRAWING_CODE (3D)", palette="tab10")
# =============================================================================

In [None]:
## Data Without Pre-Processing
# =============================================================================
sns.pairplot(data, hue="QTY_ECN_2D", palette="tab10")
# =============================================================================

In [None]:
# =============================================================================
sns.relplot(x='LEAD_TIME', y='QTY_ECN_2D', hue='QTY_SHEETS', palette="tab10", data=data)
sns.relplot(x='QTY_PART_LIST', y='QTY_VIEWS', hue='QTY_SHEETS', palette="tab10", data=data)
sns.relplot(x='LEAD_TIME', y='QTY_ECN_2D', hue='QTY_SHEETS', palette="tab10", data=data)

sns.relplot(x='QTN_REV_3D', y='QTY_ECN_2D', hue='QTY_SHEETS', palette="tab10", data=data)
sns.relplot(x='QTN_REV_3D', y='QTY_ECN_2D', hue='QTY_SHEETS', palette="tab10", data=data)
sns.relplot(x='QTY_ECN_2D', y='QTY_ECN_2D', hue='QTY_SHEETS', palette="tab10", data=data)

sns.relplot(x='QTY_VIEWS', y='QTY_ECN_2D', hue='QTY_SHEETS', palette="tab10", data=data)
sns.relplot(x='QTY_DIMENSIONS', y='QTY_VIEWS', hue='QTY_SHEETS', palette="tab10", data=data)
sns.relplot(x='QTY_TEXT_INFORMATION', y='QTY_VIEWS', hue='QTY_SHEETS', palette="tab10", data=data)

sns.relplot(x='MEAN_SIZE_2D', y='MEAN_SIZE_2D', hue='QTY_SHEETS', palette="tab10", data=data)
sns.relplot(x='MEAN_SIZE_3D', y='QTY_VIEWS', hue='QTY_SHEETS', palette="tab10", data=data)
# =============================================================================

In [None]:
# =============================================================================
g = sns.PairGrid(data, diag_sharey=False)
g.map_upper(sns.scatterplot, s=15)
g.map_lower(sns.kdeplot)
g.map_diag(sns.kdeplot, lw=2)
# =============================================================================

Trainning

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
data.head(5)
data.info()

In [None]:
data_new = data

X = data_new.drop(columns="QTY_ECN_2D")
y = data_new["QTY_ECN_2D"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

In [None]:

X.info()

In [None]:
y.info()

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from keras import layers
from sklearn.model_selection import train_test_split

In [None]:
# IF DATA IS IN YOUR DRIVE
data = pd.read_excel('BLACK_BELT_DATABASE_CASE_FISRT_ANALISYS.xlsx', header=0)
data.info()
data.head()

In [None]:
# Função para pré-processar os dados
def preProcessingDataBase2(data):

    # QTN_REV_3D
    data['QTN_REV_3D'] = data['QTN_REV_3D'].dropna()
    data['QTN_REV_3D'] = (data['QTN_REV_3D']-data['QTN_REV_3D'].min())/(data['QTN_REV_3D'].max()-data['QTN_REV_3D'].min())

    # MEAN_SIZE_3D
    data['MEAN_SIZE_3D'] = data['MEAN_SIZE_3D'].dropna()
    data['MEAN_SIZE_3D'] = (data['MEAN_SIZE_3D']-data['MEAN_SIZE_3D'].min())/(data['MEAN_SIZE_3D'].max()-data['MEAN_SIZE_3D'].min())

    # DRAWING_CODE
    data['DRAWING_CODE'] = data['DRAWING_CODE'].dropna()
    data['DRAWING_CODE'] = data['DRAWING_CODE'][data['DRAWING_CODE'] != "EL"]
    data['DRAWING_CODE'] = data['DRAWING_CODE'][data['DRAWING_CODE'] != ""]
    #data = data.join(pd.get_dummies(data.pop('DRAWING_CODE')))

    # ATP
    data['ATP'] = data['ATP'].dropna()
    data['ATP'] = data['ATP'][data['ATP'] != ""]
    #data = data.join(pd.get_dummies(data.pop('ATP')))


    # QTN_REV_2D
    data['QTN_REV_2D'] = data['QTN_REV_2D'].dropna()
    data['QTN_REV_2D'] = (data['QTN_REV_2D']-data['QTN_REV_2D'].min())/(data['QTN_REV_2D'].max()-data['QTN_REV_2D'].min())


    # QTY_ECN_2D
    data['QTY_ECN_2D'] = data['QTY_ECN_2D'].dropna()
    data['QTY_ECN_2D'] = (data['QTY_ECN_2D']-data['QTY_ECN_2D'].min())/(data['QTY_ECN_2D'].max()-data['QTY_ECN_2D'].min())
	

    # MEAN_SIZE_2D
    data['MEAN_SIZE_2D'] = data['MEAN_SIZE_2D'].dropna()
    data['MEAN_SIZE_2D'] = (data['MEAN_SIZE_2D']-data['MEAN_SIZE_2D'].min())/(data['MEAN_SIZE_2D'].max()-data['MEAN_SIZE_2D'].min())


    # QTY_SHEETS
    data['QTY_SHEETS'] = data['QTY_SHEETS'].dropna()
    data['QTY_SHEETS'] = (data['QTY_SHEETS']-data['QTY_SHEETS'].min())/(data['QTY_SHEETS'].max()-data['QTY_SHEETS'].min())

    # QTY_DIMENSIONS
    data['QTY_DIMENSIONS'] = data['QTY_DIMENSIONS'].dropna()
    data['QTY_DIMENSIONS'] = (data['QTY_DIMENSIONS']-data['QTY_DIMENSIONS'].min())/(data['QTY_DIMENSIONS'].max()-data['QTY_DIMENSIONS'].min())

    # QTY_VIEWS
    data['QTY_VIEWS'] = data['QTY_VIEWS'].dropna()
    data['QTY_VIEWS'] = (data['QTY_VIEWS']-data['QTY_VIEWS'].min())/(data['QTY_VIEWS'].max()-data['QTY_VIEWS'].min())

    # QTY_PART_LIST
    data['QTY_PART_LIST'] = data['QTY_PART_LIST'].dropna()
    data['QTY_PART_LIST'] = (data['QTY_PART_LIST']-data['QTY_PART_LIST'].min())/(data['QTY_PART_LIST'].max()-data['QTY_PART_LIST'].min())

    # QTY_TEXT_INFORMATION
    data['QTY_TEXT_INFORMATION'] = data['QTY_TEXT_INFORMATION'].dropna()
    data['QTY_TEXT_INFORMATION'] = (data['QTY_TEXT_INFORMATION']-data['QTY_TEXT_INFORMATION'].min())/(data['QTY_TEXT_INFORMATION'].max()-data['QTY_TEXT_INFORMATION'].min())

    # LEAD_TIME
    data['LEAD_TIME'] = data['LEAD_TIME'].dropna()
    data['LEAD_TIME'] = data['LEAD_TIME'][data['LEAD_TIME'] > 0]
    data['LEAD_TIME'] = (data['LEAD_TIME']-data['LEAD_TIME'].min())/(data['LEAD_TIME'].max()-data['LEAD_TIME'].min())

    # TRIM_AND_FINISH
    data['TRIM_AND_FINISH'] = data['TRIM_AND_FINISH'].dropna()
    data['TRIM_AND_FINISH'] = (data['TRIM_AND_FINISH']*-1)

    # LEAD_TIME
    data['LEAD_TO_RELEASE'] = data['LEAD_TO_RELEASE'].dropna()
    data['LEAD_TO_RELEASE'] = data['LEAD_TO_RELEASE'][data['LEAD_TO_RELEASE'] > 0]
    data['LEAD_TO_RELEASE'] = (data['LEAD_TO_RELEASE']-data['LEAD_TO_RELEASE'].min())/(data['LEAD_TO_RELEASE'].max()-data['LEAD_TO_RELEASE'].min())

    # DROP ANY ROW NULL
    data = data.dropna()

    return data

In [120]:

FEATURES = [  "QTN_REV_3D",
              "MEAN_SIZE_3D",
              "DRAWING_CODE",
              "ATP",
              "QTN_REV_2D",
              "MEAN_SIZE_2D",
              "QTY_SHEETS",
              "QTY_DIMENSIONS",
              "QTY_VIEWS",
              "QTY_PART_LIST",
              "QTY_TEXT_INFORMATION",
              "LEAD_TIME_1",
              "TRIM_AND_FINISH",
              "LEAD_TO_RELEASE",
              "QTY_ECN_2D" ]

QTN_REV_3D = tf.feature_column.numeric_column("QTN_REV_3D")
MEAN_SIZE_3D = tf.feature_column.numeric_column("MEAN_SIZE_3D")
DRAWING_CODE = tf.feature_column.categorical_column_with_vocabulary_list("DRAWING_CODE", ["True", "False"])
ATP = tf.feature_column.categorical_column_with_vocabulary_list("ATP", ["True", "False"])
QTN_REV_2D = tf.feature_column.numeric_column("QTN_REV_2D")
MEAN_SIZE_2D = tf.feature_column.numeric_column("MEAN_SIZE_2D")
QTY_SHEETS = tf.feature_column.numeric_column("QTY_SHEETS")
QTY_DIMENSIONS = tf.feature_column.numeric_column("QTY_DIMENSIONS")
QTY_VIEWS = tf.feature_column.numeric_column("QTY_VIEWS")
QTY_PART_LIST = tf.feature_column.numeric_column("QTY_PART_LIST")
QTY_TEXT_INFORMATION = tf.feature_column.numeric_column("QTY_TEXT_INFORMATION")
LEAD_TIME_1 = tf.feature_column.numeric_column("LEAD_TIME_1")
TRIM_AND_FINISH = tf.feature_column.numeric_column("TRIM_AND_FINISH")
LEAD_TO_RELEASE = tf.feature_column.numeric_column("LEAD_TO_RELEASE")

feature_columns = [ QTN_REV_3D, 
                    MEAN_SIZE_3D,
                    DRAWING_CODE,
                    ATP, 
                    QTN_REV_2D,
                    MEAN_SIZE_2D, 
                    QTY_SHEETS, 
                    QTY_DIMENSIONS, 
                    QTY_VIEWS, 
                    QTY_PART_LIST, 
                    QTY_TEXT_INFORMATION, 
                    LEAD_TIME_1, 
                    TRIM_AND_FINISH, 
                    LEAD_TO_RELEASE ]

def input_fn(num_epochs=None, shuffle=True, batch_size=100):
  df = pd.read_csv('BLACK_BELT_DATABASE_CASE_FISRT_ANALISYS.csv',
                    names=FEATURES,
                    dtype={'ATP': str},
                    skipinitialspace=True,
                    header=0)
  labels = df["QTY_ECN_2D"]
  df = df.drop(columns="QTY_ECN_2D", axis=0)
  return tf.compat.v1.estimator.inputs.pandas_input_fn( x=df,
                                                        y=labels,
                                                        batch_size=100,
                                                        num_epochs=3,
                                                        shuffle=True,
                                                        num_threads=5)

In [121]:
model = tf.estimator.LinearClassifier(model_dir=None,
                                      feature_columns=feature_columns)
model.train(input_fn=input_fn(), steps=10000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmplxguyscd', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:ten

2022-10-04 23:29:56.227566: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-04 23:29:56.228425: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.InternalError'>, Graph execution error:

Unsupported object type float
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmplxguyscd/model.ckpt.
INFO:tensorflow:/tmp/tmplxguyscd/model.ckpt-0.index
INFO:tensorflow:0
INFO:tensorflow:/tmp/tmplxguyscd/model.ckpt-0.meta
INFO:tensorflow:500
INFO:tensorflow:/tmp/tmplxguyscd/model.ckpt-0.data-00000-of-00001
INFO:tensorflow:500
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...


InternalError: Graph execution error:

Unsupported object type float