In [1]:
import os
from pprint import pprint

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
)
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [14]:
pd.set_option('display.max_columns', None) # 전체 열 보기
pd.set_option('display.max_rows', None) # 전체 행 보기

In [3]:
def read_excel_file(file_path: str, header: int = None) -> pd.DataFrame:
    csv_file = file_path.replace(".xlsx", ".csv")

    if not os.path.exists(csv_file):
        print("Converting excel to csv...")
        if header:
            df = pd.read_excel(file_path, header=header)
        else:
            df = pd.read_excel(file_path)

        df.to_csv(csv_file, index=False)
        print(f"  {file_path} -> {csv_file}")
        return df
    else:
        print(f"  Reading {csv_file}")
        return pd.read_csv(csv_file, low_memory=False)

In [4]:
ROOT_DIR = "data"
RANDOM_STATE = 110

X_Dam = read_excel_file(os.path.join(ROOT_DIR, "Dam dispensing2.xlsx"), header=0)

X_AutoClave = read_excel_file(
    os.path.join(ROOT_DIR, "Auto clave2.xlsx"), header=0
)

X_Fill1 = read_excel_file(
    os.path.join(ROOT_DIR, "Fill1 dispensing2.xlsx"), header=0)

X_Fill2 = read_excel_file(
    os.path.join(ROOT_DIR, "Fill2 dispensing2.xlsx"), header=0
)

y = pd.read_csv(os.path.join(ROOT_DIR, "train_y.csv"))

Converting excel to csv...
  data\Dam dispensing2.xlsx -> data\Dam dispensing2.csv
Converting excel to csv...
  data\Auto clave2.xlsx -> data\Auto clave2.csv
Converting excel to csv...
  data\Fill1 dispensing2.xlsx -> data\Fill1 dispensing2.csv
Converting excel to csv...
  data\Fill2 dispensing2.xlsx -> data\Fill2 dispensing2.csv


In [7]:
X_Dam.describe()

Unnamed: 0,Box ID,Insp. Seq No.,CURE END POSITION X,CURE END POSITION X.1,CURE END POSITION X.2,CURE END POSITION Z,CURE END POSITION Z.1,CURE END POSITION Z.2,CURE END POSITION Θ,CURE END POSITION Θ.1,CURE END POSITION Θ.2,CURE SPEED,CURE SPEED.1,CURE SPEED.2,CURE STANDBY POSITION X,CURE STANDBY POSITION X.1,CURE STANDBY POSITION X.2,CURE STANDBY POSITION Z,CURE STANDBY POSITION Z.1,CURE STANDBY POSITION Z.2,CURE STANDBY POSITION Θ,CURE STANDBY POSITION Θ.1,CURE STANDBY POSITION Θ.2,CURE START POSITION X,CURE START POSITION X.1,CURE START POSITION X.2,CURE START POSITION Z,CURE START POSITION Z.1,CURE START POSITION Z.2,CURE START POSITION Θ,CURE START POSITION Θ.1,CURE START POSITION Θ.2,DISCHARGED SPEED OF RESIN,DISCHARGED SPEED OF RESIN.1,DISCHARGED SPEED OF RESIN.2,DISCHARGED TIME OF RESIN(Stage1),DISCHARGED TIME OF RESIN(Stage1).1,DISCHARGED TIME OF RESIN(Stage1).2,DISCHARGED TIME OF RESIN(Stage2),DISCHARGED TIME OF RESIN(Stage2).1,DISCHARGED TIME OF RESIN(Stage2).2,DISCHARGED TIME OF RESIN(Stage3),DISCHARGED TIME OF RESIN(Stage3).1,DISCHARGED TIME OF RESIN(Stage3).2,Dispense Volume(Stage1),Dispense Volume(Stage1).1,Dispense Volume(Stage1).2,Dispense Volume(Stage2),Dispense Volume(Stage2).1,Dispense Volume(Stage2).2,Dispense Volume(Stage3),Dispense Volume(Stage3).1,Dispense Volume(Stage3).2,HEAD NORMAL COORDINATE X AXIS(Stage1).1,HEAD NORMAL COORDINATE X AXIS(Stage2),HEAD NORMAL COORDINATE X AXIS(Stage2).1,HEAD NORMAL COORDINATE X AXIS(Stage2).2,HEAD NORMAL COORDINATE X AXIS(Stage3),HEAD NORMAL COORDINATE X AXIS(Stage3).1,HEAD NORMAL COORDINATE X AXIS(Stage3).2,HEAD NORMAL COORDINATE Y AXIS(Stage1),HEAD NORMAL COORDINATE Y AXIS(Stage1).1,HEAD NORMAL COORDINATE Y AXIS(Stage1).2,HEAD NORMAL COORDINATE Y AXIS(Stage2),HEAD NORMAL COORDINATE Y AXIS(Stage2).1,HEAD NORMAL COORDINATE Y AXIS(Stage2).2,HEAD NORMAL COORDINATE Y AXIS(Stage3),HEAD NORMAL COORDINATE Y AXIS(Stage3).1,HEAD NORMAL COORDINATE Y AXIS(Stage3).2,HEAD NORMAL COORDINATE Z AXIS(Stage1),HEAD NORMAL COORDINATE Z AXIS(Stage1).1,HEAD NORMAL COORDINATE Z AXIS(Stage1).2,HEAD NORMAL COORDINATE Z AXIS(Stage2),HEAD NORMAL COORDINATE Z AXIS(Stage2).1,HEAD NORMAL COORDINATE Z AXIS(Stage2).2,HEAD NORMAL COORDINATE Z AXIS(Stage3),HEAD NORMAL COORDINATE Z AXIS(Stage3).1,HEAD NORMAL COORDINATE Z AXIS(Stage3).2,HEAD Standby Position X,HEAD Standby Position X.1,HEAD Standby Position X.2,HEAD Standby Position Y,HEAD Standby Position Y.1,HEAD Standby Position Y.2,HEAD Standby Position Z,HEAD Standby Position Z.1,HEAD Standby Position Z.2,Head Clean Position X,Head Clean Position X.1,Head Clean Position X.2,Head Clean Position Y,Head Clean Position Y.1,Head Clean Position Y.2,Head Clean Position Z,Head Clean Position Z.1,Head Clean Position Z.2,Head Purge Position X,Head Purge Position X.1,Head Purge Position X.2,Head Purge Position Y,Head Purge Position Y.1,Head Purge Position Y.2,Head Purge Position Z,Head Purge Position Z.1,Head Purge Position Z.2,Head Zero Position X,Head Zero Position X.1,Head Zero Position X.2,Head Zero Position Y,Head Zero Position Y.1,Head Zero Position Y.2,Head Zero Position Z,Head Zero Position Z.1,Head Zero Position Z.2,Machine Tact time,Machine Tact time.1,Machine Tact time.2,PalletID,PalletID.1,PalletID.2,Production Qty,Production Qty.1,Production Qty.2,Receip No,Receip No.1,Receip No.2,Stage1 Circle1 Distance Speed,Stage1 Circle1 Distance Speed.1,Stage1 Circle1 Distance Speed.2,Stage1 Circle2 Distance Speed,Stage1 Circle2 Distance Speed.1,Stage1 Circle2 Distance Speed.2,Stage1 Circle3 Distance Speed,Stage1 Circle3 Distance Speed.1,Stage1 Circle3 Distance Speed.2,Stage1 Circle4 Distance Speed,Stage1 Circle4 Distance Speed.1,Stage1 Circle4 Distance Speed.2,Stage1 Line1 Distance Speed,Stage1 Line1 Distance Speed.1,Stage1 Line1 Distance Speed.2,Stage1 Line2 Distance Speed,Stage1 Line2 Distance Speed.1,Stage1 Line2 Distance Speed.2,Stage1 Line3 Distance Speed,Stage1 Line3 Distance Speed.1,Stage1 Line3 Distance Speed.2,Stage1 Line4 Distance Speed,Stage1 Line4 Distance Speed.1,Stage1 Line4 Distance Speed.2,Stage2 Circle1 Distance Speed,Stage2 Circle1 Distance Speed.1,Stage2 Circle1 Distance Speed.2,Stage2 Circle2 Distance Speed,Stage2 Circle2 Distance Speed.1,Stage2 Circle2 Distance Speed.2,Stage2 Circle3 Distance Speed,Stage2 Circle3 Distance Speed.1,Stage2 Circle3 Distance Speed.2,Stage2 Circle4 Distance Speed,Stage2 Circle4 Distance Speed.1,Stage2 Circle4 Distance Speed.2,Stage2 Line1 Distance Speed,Stage2 Line1 Distance Speed.1,Stage2 Line1 Distance Speed.2,Stage2 Line2 Distance Speed,Stage2 Line2 Distance Speed.1,Stage2 Line2 Distance Speed.2,Stage2 Line3 Distance Speed,Stage2 Line3 Distance Speed.1,Stage2 Line3 Distance Speed.2,Stage2 Line4 Distance Speed,Stage2 Line4 Distance Speed.1,Stage2 Line4 Distance Speed.2,Stage3 Circle1 Distance Speed,Stage3 Circle1 Distance Speed.1,Stage3 Circle1 Distance Speed.2,Stage3 Circle2 Distance Speed,Stage3 Circle2 Distance Speed.1,Stage3 Circle2 Distance Speed.2,Stage3 Circle3 Distance Speed,Stage3 Circle3 Distance Speed.1,Stage3 Circle3 Distance Speed.2,Stage3 Circle4 Distance Speed,Stage3 Circle4 Distance Speed.1,Stage3 Circle4 Distance Speed.2,Stage3 Line1 Distance Speed,Stage3 Line1 Distance Speed.1,Stage3 Line1 Distance Speed.2,Stage3 Line2 Distance Speed,Stage3 Line2 Distance Speed.1,Stage3 Line2 Distance Speed.2,Stage3 Line3 Distance Speed,Stage3 Line3 Distance Speed.1,Stage3 Line3 Distance Speed.2,Stage3 Line4 Distance Speed,Stage3 Line4 Distance Speed.1,Stage3 Line4 Distance Speed.2,THICKNESS 1,THICKNESS 1.1,THICKNESS 1.2,THICKNESS 2,THICKNESS 2.1,THICKNESS 2.2,THICKNESS 3,THICKNESS 3.1,THICKNESS 3.2,WorkMode,WorkMode.1,WorkMode.2,Unnamed: 221
count,0.0,62479.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,62479.0,0.0,0.0,36595.0
mean,,1.055491,526.257695,,,6.328646,,,-21.216889,,,73.616559,,,1150.0,,,33.5,,,0.0,,,747.526689,,,33.5,,,-21.216889,,,12.819203,,,14.207225,,,6.857402,,,14.198214,,,1.075449,,,0.520774,,,1.073448,,,,402.351206,,,372.787289,,,443.618786,,,720.135393,,,742.169202,,,601.425279,,,281.010138,,,281.010277,,,270.01945,,,177.871909,,,38.657309,,,52.821108,,,102.021583,,,93.363854,,,181.662219,,,177.871909,,,93.364618,,,285.854059,,,420.494078,,,285.943653,,,187.353794,,,39.832473,,,75.97151,,,106.973367,,,3056.069239,,,6523.09416,,,6523.09416,,,6523.09416,,,6517.012116,,,6520.725364,,,6517.012116,,,6500.011204,,,7346.298756,,,7176.084764,,,7176.084764,,,7176.084764,,,7176.084764,,,7114.448055,,,7175.763056,,,7111.69513,,,6352.55846,,,6522.093824,,,6522.093824,,,6522.093824,,,6516.01178,,,6519.620993,,,6516.01178,,,6518.476608,,,3466.795243,,,-0.001662,,,-0.008205,,,2.894915,,,6.937696
std,,0.247805,367.155839,,,4.869219,,,87.464084,,,9.945053,,,0.0,,,0.0,,,0.0,,,362.329333,,,0.0,,,87.464084,,,2.994478,,,3.729297,,,2.075467,,,3.717956,,,0.326006,,,0.192497,,,0.324374,,,,156.204299,,,149.8322,,,299.145193,,,435.67177,,,465.029955,,,435.21243,,,3.620787,,,3.620536,,,11.30486,,,94.08694,,,32.511717,,,62.806727,,,30.295009,,,32.55556,,,63.411497,,,94.08694,,,32.55647,,,184.306956,,,100.48772,,,18.336184,,,93.038366,,,29.402665,,,106.949151,,,131.164023,,,3869.791265,,,1752.046118,,,1752.046118,,,1752.046118,,,1746.901002,,,1752.621487,,,1746.901002,,,1761.576927,,,2196.248433,,,2265.211167,,,2265.211167,,,2265.211167,,,2265.211167,,,2297.826688,,,2265.035722,,,2297.749705,,,1759.479812,,,1752.201752,,,1752.201752,,,1752.201752,,,1747.053612,,,1752.58397,,,1747.053612,,,1752.759645,,,3045.446957,,,0.009546,,,0.033493,,,3.452136,,,0.554657
min,,1.0,240.0,,,2.5,,,-90.0,,,32.0,,,1150.0,,,33.5,,,0.0,,,280.0,,,33.5,,,-90.0,,,10.0,,,9.6,,,3.8,,,9.6,,,0.67,,,0.26,,,0.67,,,,161.2,,,159.5,,,159.8,,,377.0,,,377.1,,,282.15,,,273.8,,,273.8,,,257.0,,,66.0,,,0.0,,,0.0,,,66.0,,,66.0,,,118.85,,,66.0,,,66.0,,,130.85,,,300.0,,,265.0,,,26.3,,,1.0,,,0.0,,,0.0,,,1.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,4000.0,,,0.0,,,-0.054,,,-0.219,,,-0.118,,,2.0
25%,,1.0,240.0,,,2.5,,,-90.0,,,70.0,,,1150.0,,,33.5,,,0.0,,,280.0,,,33.5,,,-90.0,,,10.0,,,9.7,,,4.9,,,9.7,,,0.67,,,0.34,,,0.67,,,,164.2,,,160.5,,,162.6,,,377.3,,,377.1,,,284.8,,,281.095,,,281.095,,,257.0,,,66.0,,,0.0,,,0.0,,,66.0,,,66.0,,,130.85,,,66.0,,,66.0,,,130.85,,,300.0,,,265.0,,,88.4,,,7.0,,,7.0,,,1.0,,,1.0,,,5000.0,,,5000.0,,,5000.0,,,5000.0,,,5000.0,,,5000.0,,,5000.0,,,5800.0,,,5300.0,,,5300.0,,,5300.0,,,5300.0,,,5300.0,,,5300.0,,,5300.0,,,5000.0,,,5000.0,,,5000.0,,,5000.0,,,5000.0,,,5000.0,,,5000.0,,,5000.0,,,0.0,,,0.0,,,0.0,,,0.0,,,7.0
50%,,1.0,240.0,,,2.5,,,-90.0,,,70.0,,,1150.0,,,33.5,,,0.0,,,1030.0,,,33.5,,,-90.0,,,10.0,,,13.2,,,6.6,,,13.2,,,0.99,,,0.53,,,0.99,,,,463.0,,,463.9,,,377.5,,,377.6,,,377.5,,,377.6,,,282.15,,,282.15,,,274.33,,,257.0,,,66.0,,,0.0,,,127.5,,,66.0,,,130.85,,,257.0,,,66.0,,,130.85,,,505.0,,,300.0,,,265.0,,,54.3,,,14.0,,,43.0,,,1.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,5500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,6500.0,,,4000.0,,,0.0,,,0.0,,,0.0,,,7.0
75%,,1.0,1000.0,,,12.5,,,90.0,,,70.0,,,1150.0,,,33.5,,,0.0,,,1030.0,,,33.5,,,90.0,,,16.0,,,17.0,,,8.4,,,17.0,,,1.45,,,0.71,,,1.45,,,,550.3,,,465.7,,,552.0,,,1271.8,,,1271.3,,,1271.8,,,282.5,,,282.5,,,282.15,,,257.0,,,66.0,,,127.5,,,127.5,,,130.85,,,257.0,,,257.0,,,130.85,,,505.0,,,505.0,,,300.0,,,265.02,,,64.4,,,127.0,,,193.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,9000.0,,,6500.0,,,0.0,,,0.0,,,7.0,,,7.0
max,,4.0,1000.0,,,13.5,,,90.0,,,105.0,,,1150.0,,,33.5,,,0.0,,,1030.0,,,33.5,,,90.0,,,16.0,,,21.3,,,19.7,,,21.4,,,2.34,,,1.37,,,1.62,,,,552.1,,,552.0,,,1377.9,,,1380.3,,,1394.2,,,1380.3,,,284.8,,,284.8,,,282.5,,,257.0,,,66.0,,,127.5,,,127.5,,,133.5,,,257.0,,,257.0,,,133.5,,,505.0,,,505.0,,,360.0,,,999.9,,,410.8,,,489.0,,,608.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,13000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,13000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,12000.0,,,0.037,,,0.007,,,7.0,,,7.0


In [12]:
X_Dam1 = X_Dam.dropna(axis=1)

In [16]:
X_Dam1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62479 entries, 0 to 62478
Data columns (total 79 columns):
 #   Column                                 Non-Null Count  Dtype         
---  ------                                 --------------  -----         
 0   Wip Line                               62479 non-null  object        
 1   Process Desc.                          62479 non-null  object        
 2   Equipment                              62479 non-null  object        
 3   Model.Suffix                           62479 non-null  object        
 4   Workorder                              62479 non-null  object        
 5   LOT ID                                 62479 non-null  object        
 6   Set ID                                 62479 non-null  object        
 7   Collect Date                           62479 non-null  datetime64[ns]
 8   Insp. Seq No.                          62479 non-null  int64         
 9   Insp Judge Code                        62479 non-null  object

In [19]:
X_AutoClave1 = X_AutoClave.dropna(axis=1)

In [20]:
X_AutoClave1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61052 entries, 0 to 61051
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Wip Line         61052 non-null  object        
 1   Process Desc.    61052 non-null  object        
 2   Equipment        61052 non-null  object        
 3   Model.Suffix     61052 non-null  object        
 4   Workorder        61052 non-null  object        
 5   LOT ID           61052 non-null  object        
 6   Set ID           61052 non-null  object        
 7   Collect Date     61052 non-null  datetime64[ns]
 8   Insp. Seq No.    61052 non-null  int64         
 9   Insp Judge Code  61052 non-null  object        
 10  1st Pressure     61052 non-null  float64       
 11  1st Pressure.1   61052 non-null  int64         
 12  1st Pressure.2   61052 non-null  object        
 13  2nd Pressure     61052 non-null  float64       
 14  2nd Pressure.1   61052 non-null  int64

In [None]:
X_Fill1= 