In [1]:
import os
from pprint import pprint

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
)
from sklearn.model_selection import train_test_split
from tqdm import tqdm

### 엑셀 파일 csv 전환 함수

In [None]:
def read_excel_file(file_path: str, header: int = None) -> pd.DataFrame:
    csv_file = file_path.replace(".xlsx", ".csv")

    if not os.path.exists(csv_file):
        print("Converting excel to csv...")
        if header:
            df = pd.read_excel(file_path, header=header)
        else:
            df = pd.read_excel(file_path)

        df.to_csv(csv_file, index=False)
        print(f"  {file_path} -> {csv_file}")
        return df
    else:
        print(f"  Reading {csv_file}")
        return pd.read_csv(csv_file, low_memory=False)

In [None]:
ROOT_DIR = "data"
RANDOM_STATE = 110

X_Dam = read_excel_file(os.path.join(ROOT_DIR, "Dam dispensing.xlsx"), header=0)  # 1: 헤더 없음, 0: N값 없음

X_AutoClave = read_excel_file(
    os.path.join(ROOT_DIR, "Auto clave.xlsx"), header=0
)

X_Fill1 = read_excel_file(
    os.path.join(ROOT_DIR, "Fill1 dispensing.xlsx"), header=0
)

X_Fill2 = read_excel_file(
    os.path.join(ROOT_DIR, "Fill2 dispensing.xlsx"), header=0
)

y = pd.read_csv(os.path.join(ROOT_DIR, "train_y.csv"))

### Dam dispensing

In [74]:
dam = pd.read_csv("C:\박소현\LG 해커톤\data\Dam dispensing(헤더,N).csv", encoding='cp949', low_memory=False, header = 1)
dam.shape

(62479, 222)

In [75]:
# 각 열의 결측치 수 확인
missing_dam = dam.isnull().sum()

# 결측치가 600000개 이상인 열만 필터링
missing_columns_dam = missing_dam[missing_dam > 60000].index
print(missing_columns_dam.size)
missing_columns_dam

140


Index(['Box ID', 'Unit Time', 'Judge Value', 'Unit Time.1', 'Judge Value.1',
       'Unit Time.2', 'Judge Value.2', 'Unit Time.3', 'Judge Value.3',
       'Unit Time.4',
       ...
       'Unit Time.65', 'Judge Value.65', 'Unit Time.66', 'Judge Value.66',
       'Unit Time.67', 'Judge Value.67', 'Unit Time.68', 'Judge Value.68',
       'Unit Time.69', 'Judge Value.69'],
      dtype='object', length=140)

In [76]:
# 결측치가 60,000개 이상인 열 제거
dam = dam.drop(columns=missing_columns_dam)
dam

Unnamed: 0,Wip Line,Process Desc.,Equipment,Model.Suffix,Workorder,LOT ID,Set ID,Collect Date,Insp. Seq No.,Insp Judge Code,...,Collect Result.61,Collect Result.62,Collect Result.63,Collect Result.64,Collect Result.65,Collect Result.66,Collect Result.67,Collect Result.68,Collect Result.69,Unnamed: 221
0,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000002,OP753345013050000002,2023-05-04 8:57,1,OK,...,9000,7000,9000,7000,9000,0,0.0,0.0,7.0,
1,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000003,OP753345013050000003,2023-05-04 9:11,1,OK,...,9000,7000,9000,7000,9000,0,0.0,0.0,7.0,
2,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000004,OP753345013050000004,2023-05-04 9:13,1,OK,...,9000,7000,9000,7000,9000,0,0.0,0.0,7.0,
3,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000005,OP753345013050000005,2023-05-04 9:15,1,OK,...,9000,7000,9000,7000,9000,0,0.0,0.0,7.0,
4,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000006,OP753345013050000006,2023-05-04 9:17,1,OK,...,9000,7000,9000,7000,9000,0,0.0,0.0,7.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62474,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334505,4F1XB738-1,OP753345054040002685,OP753345054040002685,2024-04-28 18:30,1,OK,...,5800,5800,5800,5800,5800,5800,0.0,0.0,0.0,7.0
62475,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002686,OP753345054040002686,2024-04-28 18:31,1,OK,...,5800,5800,5800,5800,5800,5800,0.0,0.0,0.0,7.0
62476,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334505,4F1XB738-1,OP753345054040002687,OP753345054040002687,2024-04-28 18:32,1,OK,...,5800,5800,5800,5800,5800,5800,0.0,0.0,0.0,7.0
62477,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002688,OP753345054040002688,2024-04-28 18:33,1,OK,...,5800,5800,5800,5800,5800,5800,0.0,0.0,0.0,7.0


In [77]:
# 각 열의 결측치 수 확인
missing_dam = dam.isnull().sum()

# 결측치가 있는 열 확인
missing_dam[missing_dam > 0]

Collect Result.17    19445
Judge Value.17       45329
Unnamed: 221         25884
dtype: int64

### Fill1 dispensing

In [52]:
fill1 = pd.read_csv("C:\박소현\LG 해커톤\data\Fill1 dispensing(헤더,N).csv", encoding='cp949', low_memory=False, header = 1)
fill1.shape

(61928, 102)

In [80]:
# 각 열의 결측치 수 확인
missing_fill1 = fill1.isnull().sum()

# 결측치가 600000개 이상인 열만 필터링
missing_columns_fill1 = missing_fill1[missing_fill1 > 60000].index
print(missing_columns_fill1.size)
missing_columns_fill1

60


Index(['Box ID', 'Unit Time', 'Judge Value', 'Unit Time.1', 'Judge Value.1',
       'Unit Time.2', 'Judge Value.2', 'Unit Time.3', 'Judge Value.3',
       'Unit Time.4', 'Judge Value.4', 'Unit Time.5', 'Judge Value.5',
       'Unit Time.6', 'Judge Value.6', 'Unit Time.7', 'Unit Time.8',
       'Judge Value.8', 'Unit Time.9', 'Judge Value.9', 'Unit Time.10',
       'Judge Value.10', 'Unit Time.11', 'Judge Value.11', 'Unit Time.12',
       'Judge Value.12', 'Unit Time.13', 'Judge Value.13', 'Unit Time.14',
       'Judge Value.14', 'Unit Time.15', 'Judge Value.15', 'Unit Time.16',
       'Judge Value.16', 'Unit Time.17', 'Judge Value.17', 'Unit Time.18',
       'Judge Value.18', 'Unit Time.19', 'Judge Value.19', 'Unit Time.20',
       'Judge Value.20', 'Unit Time.21', 'Judge Value.21', 'Unit Time.22',
       'Judge Value.22', 'Unit Time.23', 'Judge Value.23', 'Unit Time.24',
       'Judge Value.24', 'Unit Time.25', 'Judge Value.25', 'Unit Time.26',
       'Judge Value.26', 'Unit Time.27',

In [82]:
# 결측치가 60,000개 이상인 열 제거
fill1 = fill1.drop(columns=missing_columns_fill1)
fill1

Unnamed: 0,Wip Line,Process Desc.,Equipment,Model.Suffix,Workorder,LOT ID,Set ID,Collect Date,Insp. Seq No.,Insp Judge Code,...,Collect Result.21,Collect Result.22,Collect Result.23,Collect Result.24,Collect Result.25,Collect Result.26,Collect Result.27,Collect Result.28,Collect Result.29,Unnamed: 101
0,IVI-OB6,Fill1 Dispenser,Fill1 dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000002,OP753345013050000002,2023-05-04 8:59,1,OK,...,92.2,289.0,50,85,111.0,7.0,1,1,7,
1,IVI-OB6,Fill1 Dispenser,Fill1 dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000003,OP753345013050000003,2023-05-04 9:13,1,OK,...,92.2,289.0,50,85,110.9,5.0,2,1,7,
2,IVI-OB6,Fill1 Dispenser,Fill1 dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000004,OP753345013050000004,2023-05-04 9:15,1,OK,...,92.2,289.0,50,85,111.0,6.0,3,1,7,
3,IVI-OB6,Fill1 Dispenser,Fill1 dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000005,OP753345013050000005,2023-05-04 9:17,1,OK,...,92.2,289.0,50,85,110.9,7.0,4,1,7,
4,IVI-OB6,Fill1 Dispenser,Fill1 dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000006,OP753345013050000006,2023-05-04 9:19,1,OK,...,92.2,289.0,50,85,111.0,8.0,5,1,7,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61923,IVI-OB6,Fill1 Dispenser,Fill1 dispenser #2,AJX75334505,4F1XB738-1,OP753345054040002685,OP753345054040002685,2024-04-28 18:32,1,OK,...,50.0,92.2,289,50,128.0,57.5,11,435,1,7.0
61924,IVI-OB6,Fill1 Dispenser,Fill1 dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002686,OP753345054040002686,2024-04-28 18:33,1,OK,...,50.0,92.2,289,50,128.0,57.4,5,436,1,7.0
61925,IVI-OB6,Fill1 Dispenser,Fill1 dispenser #2,AJX75334505,4F1XB738-1,OP753345054040002687,OP753345054040002687,2024-04-28 18:34,1,OK,...,50.0,92.2,289,50,128.0,57.7,12,437,1,7.0
61926,IVI-OB6,Fill1 Dispenser,Fill1 dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002688,OP753345054040002688,2024-04-28 18:35,1,OK,...,50.0,92.2,289,50,128.0,52.8,6,438,1,7.0


In [83]:
# 각 열의 결측치 수 확인
missing_fill1 = fill1.isnull().sum()

# 결측치가 있는 열 확인
missing_fill1[missing_fill1 > 0]

Collect Result.7    19126
Judge Value.7       44962
Unnamed: 101        50663
dtype: int64

### FIll2 dispensing

In [91]:
fill2 = pd.read_csv("C:\박소현\LG 해커톤\data\Fill2 dispensing(헤더,N).csv", encoding='cp949', low_memory=False, header = 1)
fill2.shape

(62318, 132)

In [92]:
# 각 열의 결측치 수 확인
missing_fill2 = fill2.isnull().sum()

# 결측치가 600000개 이상인 열만 필터링
missing_columns_fill2 = missing_fill2[missing_fill2 > 60000].index
print(missing_columns_fill2.size)
missing_columns_fill2

80


Index(['Box ID', 'Unit Time', 'Judge Value', 'Unit Time.1', 'Judge Value.1',
       'Unit Time.2', 'Judge Value.2', 'Unit Time.3', 'Judge Value.3',
       'Unit Time.4', 'Judge Value.4', 'Unit Time.5', 'Judge Value.5',
       'Unit Time.6', 'Judge Value.6', 'Unit Time.7', 'Judge Value.7',
       'Unit Time.8', 'Judge Value.8', 'Unit Time.9', 'Judge Value.9',
       'Unit Time.10', 'Judge Value.10', 'Unit Time.11', 'Judge Value.11',
       'Unit Time.12', 'Judge Value.12', 'Unit Time.13', 'Judge Value.13',
       'Unit Time.14', 'Judge Value.14', 'Unit Time.15', 'Judge Value.15',
       'Unit Time.16', 'Judge Value.16', 'Unit Time.17', 'Unit Time.18',
       'Judge Value.18', 'Unit Time.19', 'Judge Value.19', 'Unit Time.20',
       'Judge Value.20', 'Unit Time.21', 'Judge Value.21', 'Unit Time.22',
       'Judge Value.22', 'Unit Time.23', 'Judge Value.23', 'Unit Time.24',
       'Judge Value.24', 'Unit Time.25', 'Judge Value.25', 'Unit Time.26',
       'Judge Value.26', 'Unit Time.27', 

In [93]:
# 결측치가 60,000개 이상인 열 제거
fill2 = fill2.drop(columns=missing_columns_fill2)
fill2

Unnamed: 0,Wip Line,Process Desc.,Equipment,Model.Suffix,Workorder,LOT ID,Set ID,Collect Date,Insp. Seq No.,Insp Judge Code,...,Collect Result.31,Collect Result.32,Collect Result.33,Collect Result.34,Collect Result.35,Collect Result.36,Collect Result.37,Collect Result.38,Collect Result.39,Unnamed: 131
0,IVI-OB6,Fill2 Dispenser,Fill2 dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000002,OP753345013050000002,2023-05-04 9:00,1,OK,...,91.8,270.0,50,85,17.800,7.0,1,1,0,
1,IVI-OB6,Fill2 Dispenser,Fill2 dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000003,OP753345013050000003,2023-05-04 9:15,1,OK,...,91.8,270.0,50,85,18.200,5.0,2,1,0,
2,IVI-OB6,Fill2 Dispenser,Fill2 dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000004,OP753345013050000004,2023-05-04 9:17,1,OK,...,91.8,270.0,50,85,18.400,6.0,3,1,0,
3,IVI-OB6,Fill2 Dispenser,Fill2 dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000005,OP753345013050000005,2023-05-04 9:19,1,OK,...,91.8,270.0,50,85,18.600,7.0,4,1,0,
4,IVI-OB6,Fill2 Dispenser,Fill2 dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000006,OP753345013050000006,2023-05-04 9:21,1,OK,...,91.8,270.0,50,85,18.100,8.0,5,1,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62313,IVI-OB6,Fill2 Dispenser,Fill2 dispenser #2,AJX75334505,4F1XB738-1,OP753345054040002685,OP753345054040002685,2024-04-28 18:33,1,OK,...,50.0,91.8,270,50,114.612,19.4,11,435,1,0.0
62314,IVI-OB6,Fill2 Dispenser,Fill2 dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002686,OP753345054040002686,2024-04-28 18:34,1,OK,...,50.0,91.8,270,50,114.612,19.4,5,436,1,0.0
62315,IVI-OB6,Fill2 Dispenser,Fill2 dispenser #2,AJX75334505,4F1XB738-1,OP753345054040002687,OP753345054040002687,2024-04-28 18:35,1,OK,...,50.0,91.8,270,50,114.612,19.0,12,437,1,0.0
62316,IVI-OB6,Fill2 Dispenser,Fill2 dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002688,OP753345054040002688,2024-04-28 18:36,1,OK,...,50.0,91.8,270,50,114.612,18.8,6,438,1,0.0


In [94]:
# 각 열의 결측치 수 확인
missing_fill2 = fill2.isnull().sum()

# 결측치가 있는 열 확인
missing_fill2[missing_fill2 > 0]

Collect Result.17    19393
Judge Value.17       45198
Unnamed: 131         25805
dtype: int64

### Auto clave

In [95]:
auto = pd.read_csv("C:\박소현\LG 해커톤\data\Auto clave(헤더,N).csv", encoding='cp949', low_memory=False, header = 1)
auto.shape

(61052, 26)

In [96]:
# 각 열의 결측치 수 확인
missing_auto = auto.isnull().sum()

# 결측치가 600000개 이상인 열만 필터링
missing_columns_auto = missing_auto[missing_auto > 60000].index
print(missing_columns_auto.size)
missing_columns_auto

2


Index(['Box ID', 'Unit Time.4'], dtype='object')

In [99]:
# 결측치가 60,000개 이상인 열 제거
auto = auto.drop(columns=missing_columns_auto)
auto

Unnamed: 0,Wip Line,Process Desc.,Equipment,Model.Suffix,Workorder,LOT ID,Set ID,Collect Date,Insp. Seq No.,Insp Judge Code,...,Unit Time.1,Judge Value.1,Collect Result.2,Unit Time.2,Judge Value.2,Collect Result.3,Unit Time.3,Judge Value.3,Collect Result.4,Judge Value.4
0,IVI-OB6,Auto Clave Out,Auto Clave Out,AJX75334501,3F1X5847-2,OP753345013050000002,OP753345013050000002,2023-05-04 9:29,1,OK,...,120,OK,0.503,90,OK,56,510,OK,,
1,IVI-OB6,Auto Clave Out,Auto Clave Out,AJX75334501,3F1X5847-2,OP753345013050000003,OP753345013050000003,2023-05-04 9:49,1,OK,...,120,OK,0.501,90,OK,50,510,OK,,
2,IVI-OB6,Auto Clave Out,Auto Clave Out,AJX75334501,3F1X5847-2,OP753345013050000004,OP753345013050000004,2023-05-04 9:49,1,OK,...,120,OK,0.501,90,OK,50,510,OK,,
3,IVI-OB6,Auto Clave Out,Auto Clave Out,AJX75334501,3F1X5847-2,OP753345013050000005,OP753345013050000005,2023-05-04 9:49,1,OK,...,120,OK,0.501,90,OK,50,510,OK,,
4,IVI-OB6,Auto Clave Out,Auto Clave Out,AJX75334501,3F1X5847-2,OP753345013050000006,OP753345013050000006,2023-05-04 9:49,1,OK,...,120,OK,0.501,90,OK,50,510,OK,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61047,IVI-OB6,Auto Clave Out,Auto Clave Out,EAT65200901,4EPM0222-2,OP652009014040000066,OP652009014040000066,2024-04-20 12:03,1,OK,...,120,OK,0.499,120,OK,35,360,OK,,
61048,IVI-OB6,Auto Clave Out,Auto Clave Out,EAT65200901,4EPM0222-2,OP652009014040000067,OP652009014040000067,2024-04-20 12:03,1,OK,...,120,OK,0.499,120,OK,35,360,OK,,
61049,IVI-OB6,Auto Clave Out,Auto Clave Out,EAT65200901,4EPM0222-2,OP652009014040000068,OP652009014040000068,2024-04-20 11:52,1,OK,...,120,OK,0.500,120,OK,36,360,OK,,
61050,IVI-OB6,Auto Clave Out,Auto Clave Out,EAT65200901,4EPM0222-2,OP652009014040000069,OP652009014040000069,2024-04-20 11:52,1,OK,...,120,OK,0.500,120,OK,36,360,OK,,


In [101]:
# 각 열의 결측치 수 확인
missing_auto = auto.isnull().sum()

# 결측치가 있는 열 확인
missing_auto[missing_auto > 0]

Collect Result.4    44110
Judge Value.4       44110
dtype: int64

### 데이터 병합

In [None]:
# Rename columns
dam.columns = [i + " - Dam" for i in dam.columns]
fill1.columns = [i + " - Fill1" for i in fill1.columns]
fill2.columns = [i + " - Fill2" for i in fill2.columns]
auto.columns = [i + " - AutoClave" for i in auto.columns]
dam = dam.rename(columns={"Set ID - Dam": "Set ID"})
fill1 = fill1.rename(columns={"Set ID - Fill1": "Set ID"})
fill2 = fill2.rename(columns={"Set ID - Fill2": "Set ID"})
auto = auto.rename(columns={"Set ID - AutoClave": "Set ID"})

In [112]:
# Merge X
X = pd.merge(dam, fill1, on="Set ID")
X = pd.merge(X, fill2, on="Set ID")
X = pd.merge(X, auto, on="Set ID")
X = X.drop_duplicates(subset="Set ID").reset_index(drop=True)
X

Unnamed: 0,Wip Line - Dam,Process Desc. - Dam,Equipment - Dam,Model.Suffix - Dam,Workorder - Dam,LOT ID - Dam,Set ID,Collect Date - Dam,Insp. Seq No. - Dam,Insp Judge Code - Dam,...,Unit Time.1 - AutoClave,Judge Value.1 - AutoClave,Collect Result.2 - AutoClave,Unit Time.2 - AutoClave,Judge Value.2 - AutoClave,Collect Result.3 - AutoClave,Unit Time.3 - AutoClave,Judge Value.3 - AutoClave,Collect Result.4 - AutoClave,Judge Value.4 - AutoClave
0,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000002,OP753345013050000002,2023-05-04 8:57,1,OK,...,120,OK,0.503,90,OK,56,510,OK,,
1,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000003,OP753345013050000003,2023-05-04 9:11,1,OK,...,120,OK,0.501,90,OK,50,510,OK,,
2,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000004,OP753345013050000004,2023-05-04 9:13,1,OK,...,120,OK,0.501,90,OK,50,510,OK,,
3,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000005,OP753345013050000005,2023-05-04 9:15,1,OK,...,120,OK,0.501,90,OK,50,510,OK,,
4,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000006,OP753345013050000006,2023-05-04 9:17,1,OK,...,120,OK,0.501,90,OK,50,510,OK,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57862,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334505,4F1XB738-1,OP753345054040002685,OP753345054040002685,2024-04-28 18:30,1,OK,...,1,OK,0.499,120,OK,55,361,OK,,
57863,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002686,OP753345054040002686,2024-04-28 18:31,1,OK,...,1,OK,0.499,120,OK,55,361,OK,,
57864,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334505,4F1XB738-1,OP753345054040002687,OP753345054040002687,2024-04-28 18:32,1,OK,...,1,OK,0.499,120,OK,55,361,OK,,
57865,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002688,OP753345054040002688,2024-04-28 18:33,1,OK,...,1,OK,0.499,120,OK,55,361,OK,,


In [122]:
X.to_csv("X.csv", index=False)

In [115]:
y = pd.read_csv("C:\\박소현\\LG 해커톤\\data\\train_y.csv")

In [116]:
# Merge X and y
df_merged = pd.merge(X, y, "inner", on="Set ID")
df_merged

Unnamed: 0,Wip Line - Dam,Process Desc. - Dam,Equipment - Dam,Model.Suffix - Dam,Workorder - Dam,LOT ID - Dam,Set ID,Collect Date - Dam,Insp. Seq No. - Dam,Insp Judge Code - Dam,...,Judge Value.1 - AutoClave,Collect Result.2 - AutoClave,Unit Time.2 - AutoClave,Judge Value.2 - AutoClave,Collect Result.3 - AutoClave,Unit Time.3 - AutoClave,Judge Value.3 - AutoClave,Collect Result.4 - AutoClave,Judge Value.4 - AutoClave,target
0,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000003,OP753345013050000003,2023-05-04 9:11,1,OK,...,OK,0.501,90,OK,50,510,OK,,,AbNormal
1,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000004,OP753345013050000004,2023-05-04 9:13,1,OK,...,OK,0.501,90,OK,50,510,OK,,,AbNormal
2,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000007,OP753345013050000007,2023-05-04 9:19,1,OK,...,OK,0.502,90,OK,54,510,OK,,,AbNormal
3,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000011,OP753345013050000011,2023-05-04 11:19,1,OK,...,OK,0.500,90,OK,49,510,NG,,,AbNormal
4,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000024,OP753345013050000024,2023-05-05 9:35,1,OK,...,OK,0.501,90,OK,50,510,OK,,,AbNormal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40501,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002680,OP753345054040002680,2024-04-28 18:25,1,OK,...,OK,0.499,120,OK,55,361,OK,,,Normal
40502,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334505,4F1XB738-1,OP753345054040002681,OP753345054040002681,2024-04-28 18:26,1,OK,...,OK,0.499,120,OK,55,361,OK,,,Normal
40503,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002682,OP753345054040002682,2024-04-28 18:27,1,OK,...,OK,0.499,120,OK,55,361,OK,,,Normal
40504,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002684,OP753345054040002684,2024-04-28 18:29,1,OK,...,OK,0.499,120,OK,55,361,OK,,,Normal


In [123]:
df_merged.to_csv('merge.csv', index=False)

In [4]:
df = pd.read_csv('merge.csv', low_memory=False)
df

Unnamed: 0,Wip Line - Dam,Process Desc. - Dam,Equipment - Dam,Model.Suffix - Dam,Workorder - Dam,LOT ID - Dam,Set ID,Collect Date - Dam,Insp. Seq No. - Dam,Insp Judge Code - Dam,...,Judge Value.1 - AutoClave,Collect Result.2 - AutoClave,Unit Time.2 - AutoClave,Judge Value.2 - AutoClave,Collect Result.3 - AutoClave,Unit Time.3 - AutoClave,Judge Value.3 - AutoClave,Collect Result.4 - AutoClave,Judge Value.4 - AutoClave,target
0,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000003,OP753345013050000003,2023-05-04 9:11,1,OK,...,OK,0.501,90,OK,50,510,OK,,,AbNormal
1,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000004,OP753345013050000004,2023-05-04 9:13,1,OK,...,OK,0.501,90,OK,50,510,OK,,,AbNormal
2,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000007,OP753345013050000007,2023-05-04 9:19,1,OK,...,OK,0.502,90,OK,54,510,OK,,,AbNormal
3,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000011,OP753345013050000011,2023-05-04 11:19,1,OK,...,OK,0.500,90,OK,49,510,NG,,,AbNormal
4,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3F1X5847-2,OP753345013050000024,OP753345013050000024,2023-05-05 9:35,1,OK,...,OK,0.501,90,OK,50,510,OK,,,AbNormal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40501,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002680,OP753345054040002680,2024-04-28 18:25,1,OK,...,OK,0.499,120,OK,55,361,OK,,,Normal
40502,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334505,4F1XB738-1,OP753345054040002681,OP753345054040002681,2024-04-28 18:26,1,OK,...,OK,0.499,120,OK,55,361,OK,,,Normal
40503,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002682,OP753345054040002682,2024-04-28 18:27,1,OK,...,OK,0.499,120,OK,55,361,OK,,,Normal
40504,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,4F1XB738-1,OP753345054040002684,OP753345054040002684,2024-04-28 18:29,1,OK,...,OK,0.499,120,OK,55,361,OK,,,Normal


In [21]:
df[['Wip Line - Dam', 'Wip Line - Fill1', 'Wip Line - Fill2', 'Wip Line - AutoClave']]

Unnamed: 0,Wip Line - Dam,Wip Line - Fill1,Wip Line - Fill2,Wip Line - AutoClave
0,IVI-OB6,IVI-OB6,IVI-OB6,IVI-OB6
1,IVI-OB6,IVI-OB6,IVI-OB6,IVI-OB6
2,IVI-OB6,IVI-OB6,IVI-OB6,IVI-OB6
3,IVI-OB6,IVI-OB6,IVI-OB6,IVI-OB6
4,IVI-OB6,IVI-OB6,IVI-OB6,IVI-OB6
...,...,...,...,...
40501,IVI-OB6,IVI-OB6,IVI-OB6,IVI-OB6
40502,IVI-OB6,IVI-OB6,IVI-OB6,IVI-OB6
40503,IVI-OB6,IVI-OB6,IVI-OB6,IVI-OB6
40504,IVI-OB6,IVI-OB6,IVI-OB6,IVI-OB6


In [22]:
df[['Process Desc. - Dam', 'Process Desc. - Fill1', 'Process Desc. - Fill2', 'Process Desc. - AutoClave']]

Unnamed: 0,Process Desc. - Dam,Process Desc. - Fill1,Process Desc. - Fill2,Process Desc. - AutoClave
0,Dam Dispenser,Fill1 Dispenser,Fill2 Dispenser,Auto Clave Out
1,Dam Dispenser,Fill1 Dispenser,Fill2 Dispenser,Auto Clave Out
2,Dam Dispenser,Fill1 Dispenser,Fill2 Dispenser,Auto Clave Out
3,Dam Dispenser,Fill1 Dispenser,Fill2 Dispenser,Auto Clave Out
4,Dam Dispenser,Fill1 Dispenser,Fill2 Dispenser,Auto Clave Out
...,...,...,...,...
40501,Dam Dispenser,Fill1 Dispenser,Fill2 Dispenser,Auto Clave Out
40502,Dam Dispenser,Fill1 Dispenser,Fill2 Dispenser,Auto Clave Out
40503,Dam Dispenser,Fill1 Dispenser,Fill2 Dispenser,Auto Clave Out
40504,Dam Dispenser,Fill1 Dispenser,Fill2 Dispenser,Auto Clave Out


In [20]:
df[['Unnamed: 221 - Dam','Unnamed: 101 - Fill1','Unnamed: 131 - Fill2']]

Unnamed: 0,Unnamed: 221 - Dam,Unnamed: 101 - Fill1,Unnamed: 131 - Fill2
0,,,
1,,,
2,,,
3,,,
4,,,
...,...,...,...
40501,7.0,7.0,0.0
40502,7.0,7.0,0.0
40503,7.0,7.0,0.0
40504,7.0,7.0,0.0
