# 분석목표

CNC 설비와 네트워크 연결을 통해 실제 가공생산데이터를 수집하여 공구수명에 따른 
가공불량 예측을 다양한 기계학습 알고리즘을 활용하여 해결하고자 한다.

# 데이터 불러오기

In [1]:
import pandas as pd
import numpy as np
import glob
train_sample = pd.read_csv("dataset/train.csv", header=0, encoding='utf-8')
path = r'dataset/CNC Virtual Data set _v2'
all_files = glob.glob(path + "\*.csv")

train_sample_np = np.array(train_sample.copy())

li_df = []
for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)    
    li_df.append(df)

# 데이터 확인

In [2]:
train_sample

# material : 작업소재
# feedrate : tool 의 이동속도 (mm/s)
# clamp_pressure : 소재의 clamping 압력

# tool_condition : 일정시간 사용한 tool(worn), 새로운 tool(unworn)
# machining_finalized : 가공 완료 여부
# passed_visual_inspection : 육안검사 결과

Unnamed: 0,No,material,feedrate,clamp_pressure,tool_condition,machining_finalized,passed_visual_inspection
0,1,aluminum,6,4.0,unworn,yes,yes
1,2,aluminum,20,4.0,unworn,yes,yes
2,3,aluminum,6,3.0,unworn,yes,yes
3,4,aluminum,6,2.5,unworn,no,
4,5,aluminum,20,3.0,unworn,no,
5,6,aluminum,6,4.0,worn,yes,no
6,7,aluminum,20,4.0,worn,no,
7,8,aluminum,20,4.0,worn,yes,no
8,9,aluminum,15,4.0,worn,yes,no
9,10,aluminum,12,4.0,worn,yes,no


In [3]:
df

Unnamed: 0,X_ActualPosition,X_ActualVelocity,X_ActualAcceleration,X_SetPosition,X_SetVelocity,X_SetAcceleration,X_CurrentFeedback,X_DCBusVoltage,X_OutputCurrent,X_OutputVoltage,...,S_CurrentFeedback,S_DCBusVoltage,S_OutputCurrent,S_OutputVoltage,S_OutputPower,S_SystemInertia,M_CURRENT_PROGRAM_NUMBER,M_sequence_number,M_CURRENT_FEEDRATE,Machining_Process
0,176.0,4.9750,-1.250,176.0,5.0,5.0,-1.420,0.0227,327,0.355,...,0.499,2.710000e-19,327,0.0,0.000003,17,1,2,50,Prep
1,176.0,5.0250,23.775,176.0,5.0,5.0,-1.740,0.0224,327,0.589,...,0.790,2.710000e-19,327,0.0,-0.000003,17,1,0,50,Prep
2,176.0,4.9750,-1.250,176.0,5.0,5.0,0.180,0.0329,327,2.190,...,-1.300,2.710000e-19,327,0.0,-0.000006,17,1,0,50,Prep
3,176.0,4.9625,-13.750,176.0,5.0,5.0,-0.619,0.0198,327,1.010,...,-3.810,2.710000e-19,327,0.0,0.000002,17,1,0,50,Prep
4,176.0,4.9750,-4.400,176.0,5.0,5.0,-0.779,0.0311,327,0.372,...,0.790,2.710000e-19,327,0.0,0.000000,17,1,0,50,Prep
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
560,179.0,3.5625,8.150,179.0,3.5,5.0,-0.941,0.0215,328,1.790,...,0.244,2.770000e-19,328,0.0,-0.000003,17,1,0,20,End
561,178.5,3.5350,30.000,178.5,3.5,5.0,-0.780,0.0202,328,1.070,...,0.244,2.770000e-19,328,0.0,0.000000,17,1,0,20,End
562,178.5,3.5475,17.500,178.5,3.5,5.0,0.501,0.0190,328,0.990,...,0.128,2.770000e-19,328,0.0,0.000000,17,1,0,20,End
563,178.5,3.4475,11.250,178.5,3.5,5.0,-0.298,0.0193,328,1.370,...,0.657,2.770000e-19,328,0.0,-0.000008,17,1,0,20,End


|특성이름|설명|dtype|단위|
|------|------|------|------|
|X_ActualPosition|부품의 실제 x 위치|float64| mm|
|X_ActualVelocity|부품의 실제 x 속도|float64| mm/s|
|X_ActualAcceleration|부품의 실제 x 가속도| float64| mm/s/s|
|X_SetPosition|부품의 참조 x 위치| float64| mm|
|X_SetVelocity|부품의 참조 x 속도| float64| mm/s|
|X_SetAcceleration|부품의 참조 x 가속도| float64| mm/s/s|
|X_CurrentFeedback|전류| float64| A|
|X_DCBusVoltage|전압| float64| V|
|X_OutputCurrent|아웃풋 전류| float64| A|
|X_OutputVoltage|아웃풋 전압| float64| V|
|X_OutputPower|아웃풋 전원| float64| kw|
|Y_OutputPower| y 아웃풋 전원| float64| kw|
|S_SystemInertia| 토크 관성| float64| kg*m^2|
|M_CURRENT_PROGRAM_NUMBER| 프로그램이 CNC에 나열된 번호| float64||
|M_sequence_number| 실행 중인 G-code 라인| float64||
|M_CURRENT_FEEDRATE| 스핀들의 순간 공급 속도| float64||
|Machining_Process| 현재 수행 중인 가공 단계| float64||

# train_sample 데이터 개수 확인

In [4]:
train_sample

Unnamed: 0,No,material,feedrate,clamp_pressure,tool_condition,machining_finalized,passed_visual_inspection
0,1,aluminum,6,4.0,unworn,yes,yes
1,2,aluminum,20,4.0,unworn,yes,yes
2,3,aluminum,6,3.0,unworn,yes,yes
3,4,aluminum,6,2.5,unworn,no,
4,5,aluminum,20,3.0,unworn,no,
5,6,aluminum,6,4.0,worn,yes,no
6,7,aluminum,20,4.0,worn,no,
7,8,aluminum,20,4.0,worn,yes,no
8,9,aluminum,15,4.0,worn,yes,no
9,10,aluminum,12,4.0,worn,yes,no


In [5]:
c_pass = 0 # 공정 통과
c_pass_half = 0 # 육안 검사 실패
c_defective = 0 # 공정 완료 실패
for i in range(len(train_sample_np)):
    if train_sample_np[i,5] == 'no':
        c_defective += 1
    if train_sample_np[i,5] == 'yes' and train_sample_np[i,6] =='yes':
        c_pass += 1
    if train_sample_np[i,5] == 'yes' and train_sample_np[i,6] == 'no':
        c_pass_half += 1
        
print('양품수 : ', c_pass)
print('육안검사 실패수 : ', c_pass_half)
print('공정완료 실패수 : ', c_defective)
print('전체 샘플수 : ', c_pass + c_pass_half + c_defective)

양품수 :  13
육안검사 실패수 :  6
공정완료 실패수 :  6
전체 샘플수 :  25


# Preprocessing  사용자 함수 정의

In [6]:
def tool_condition(input):
    for i in range(len(input)):
        if input[i,4] == 'unworn':
            input[i,4] = 0
        else:
            input[i,4] = 1
    return input

def item_inspection(input):
    for i in range(len(input)):
        if input[i,5] == 'no':
            input[i,6] = 2
        elif input[i,5] == 'yes' and input[i,6] == 'no':
            input[i,6] = 1
        elif input[i,5] == 'yes' and input[i,6] == 'yes':
            input[i,6] = 0
    return input

def machining_process(input):
    for i in range(len(input)):
        if input[i,47] == 'Prep':
            input[i,47] = 0
        elif input[i,47] == 'Layer 1 Up':
            input[i,47] = 1
        elif input[i,47] == 'Layer 1 Down':
            input[i,47] = 2
        elif input[i,47] == 'Layer 2 Up':
            input[i,47] = 3
        elif input[i,47] == 'Layer 2 Down':
            input[i,47] = 4
        elif input[i,47] == 'Layer 3 Up':
            input[i,47] = 5
        elif input[i,47] == 'Layer 3 Down':
            input[i,47] = 6
        elif input[i,47] == 'Repositioning':
            input[i,47] = 7
        elif input[i,47] == 'End' or 'end':
            input[i,47] = 8        
        elif input[i,47] == 'Starting':
            input[i,47] = 9
    return input

# PreProcessing

## 데이터 전처리 1

In [7]:
train_sample_info = np.array(train_sample_np.copy())
train_sample_info = tool_condition(train_sample_info)
train_sample_info = item_inspection(train_sample_info)
print(train_sample_info)

[[1 'aluminum' 6 4.0 0 'yes' 0]
 [2 'aluminum' 20 4.0 0 'yes' 0]
 [3 'aluminum' 6 3.0 0 'yes' 0]
 [4 'aluminum' 6 2.5 0 'no' 2]
 [5 'aluminum' 20 3.0 0 'no' 2]
 [6 'aluminum' 6 4.0 1 'yes' 1]
 [7 'aluminum' 20 4.0 1 'no' 2]
 [8 'aluminum' 20 4.0 1 'yes' 1]
 [9 'aluminum' 15 4.0 1 'yes' 1]
 [10 'aluminum' 12 4.0 1 'yes' 1]
 [11 'aluminum' 3 4.0 0 'yes' 0]
 [12 'aluminum' 3 3.0 0 'yes' 0]
 [13 'aluminum' 3 4.0 1 'yes' 0]
 [14 'aluminum' 3 3.0 1 'yes' 0]
 [15 'aluminum' 6 3.0 1 'yes' 0]
 [16 'aluminum' 20 3.0 1 'no' 2]
 [17 'aluminum' 3 2.5 0 'yes' 0]
 [18 'aluminum' 3 2.5 1 'yes' 0]
 [19 'aluminum' 15 4.0 1 'yes' 1]
 [20 'aluminum' 12 4.0 0 'no' 2]
 [21 'aluminum' 3 4.0 0 'yes' 1]
 [22 'aluminum' 20 3.0 1 'yes' 0]
 [23 'aluminum' 3 4.0 1 'no' 2]
 [24 'aluminum' 3 3.0 0 'yes' 0]
 [25 'aluminum' 6 2.5 1 'yes' 0]]


In [8]:
train_sample_info = np.delete(train_sample_info,5,1)
train_sample_info = np.delete(train_sample_info,0,1)
train_sample_info = np.delete(train_sample_info,0,1)
print(train_sample_info)

[[6 4.0 0 0]
 [20 4.0 0 0]
 [6 3.0 0 0]
 [6 2.5 0 2]
 [20 3.0 0 2]
 [6 4.0 1 1]
 [20 4.0 1 2]
 [20 4.0 1 1]
 [15 4.0 1 1]
 [12 4.0 1 1]
 [3 4.0 0 0]
 [3 3.0 0 0]
 [3 4.0 1 0]
 [3 3.0 1 0]
 [6 3.0 1 0]
 [20 3.0 1 2]
 [3 2.5 0 0]
 [3 2.5 1 0]
 [15 4.0 1 1]
 [12 4.0 0 2]
 [3 4.0 0 1]
 [20 3.0 1 0]
 [3 4.0 1 2]
 [3 3.0 0 0]
 [6 2.5 1 0]]


In [9]:
k  = 0
li_pass = []
li_pass_half = []
li_fail = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)      
    
    if train_sample_info[k,3] == 0:
        li_pass.append(df)        
    elif train_sample_info[k,3] == 1:
        li_pass_half.append(df)        
    else :
        li_fail.append(df)
        
    k += 1
    
frame01 = pd.concat(li_pass, axis=0, ignore_index=True)
frame02 = pd.concat(li_pass_half, axis=0, ignore_index=True)
frame03 = pd.concat(li_fail, axis=0, ignore_index=True)

data_pass = np.array(frame01.copy())
data_pass_half = np.array(frame02.copy())
data_fail = np.array(frame03.copy())


print('공정완료 및 육안검사 합격한 전체 데이터 수 : ',len(data_pass))
print('공정완료 및 육안검사 불합격한 전체 데이터 수 : ',len(data_pass_half))
print('공정 미완료한 전체 데이터 수 : ',len(data_fail))

공정완료 및 육안검사 합격한 전체 데이터 수 :  22645
공정완료 및 육안검사 불합격한 전체 데이터 수 :  6175
공정 미완료한 전체 데이터 수 :  3228


In [10]:
print(data_pass.shape)
print(data_pass_half.shape)
print(data_fail.shape)

(22645, 48)
(6175, 48)
(3228, 48)


In [11]:
# 각 데이터 범주형 변환

data_pass = machining_process(data_pass)
data_pass_half = machining_process(data_pass_half)
data_fail = machining_process(data_fail)

In [12]:
# 레이블 별 데이터 갯수 제한

data_1 = data_pass[0:3228+6175,:] # 양품
data_2 = data_pass_half[0:6175,:] # 불량
data_3 = data_fail[0:3228,:] # 불량

train_input = np.concatenate((data_1,data_2),axis=0);
train_input = np.concatenate((train_input,data_3),axis=0);

data_left= data_pass[3228+6175:22645,:]

In [13]:
print(train_input.shape)
print(data_left.shape)

train_input

(18806, 48)
(13242, 48)


array([[202.0, 4.0, 4.0, ..., 0, 50, 8],
       [202.0, -6.8, -346.0, ..., 4, 50, 0],
       [200.0, -13.8, -2.25, ..., 7, 50, 0],
       ...,
       [155.0, 9.875, -64.6, ..., 0, 50, 8],
       [155.5, 9.95, -52.125, ..., 0, 50, 8],
       [156.0, 10.1, 76.125, ..., 0, 50, 8]], dtype=object)

## 신경망 분석을 위한 데이터 스케일링
* sklearn.preprocessing.MinMaxScaler
    * X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
    * X_scaled = X_std * (max - min) + min

* sklearn.preprocessing.StandardScaler
    * z = (x - u) / s

In [14]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(train_input)

train_input = scaler.transform(train_input)
test_input = scaler.transform(data_left)

train_input.shape

(18806, 48)

## 레이블 데이터 만들기

In [15]:
target_0 = np.array([0]*(3228+6175))
target_1 = np.array([1]*6175)
target_2 = np.array([2]*3228)

train_target = np.concatenate([target_0, target_1, target_2], axis=0)
train_target.shape

(18806,)

In [16]:
from sklearn.model_selection import train_test_split

train_input, test_input, train_target, test_target = train_test_split(train_input, train_target, test_size=0.2)