# Preprocessing function

In [1]:
#去除外圈
def remove_outerring(image_path):
    # 讀取圖片
    img = cv2.imread(image_path)
    # 設定外圈標的
    outerring_list = [
        [100,101,91],
        [105,106,96],
        [76, 77, 67],
        [115,116,114],
        [136,129,132],
        [128,121,124],
        [166,164,164],
        [151,146,147],
        [90, 87, 79]
    ]
    
    # 對於每種外圈顏色，計算與每個像素的距離
    threshold = 20
    for color in outerring_list:
        distance = np.sqrt(np.sum((img - color) ** 2, axis=-1))
        mask = distance < threshold
        img[mask] = [0, 0, 0]
        
    # 儲存修改後的圖片
    cv2.imwrite(image_path, img)
    return img

In [1]:
# 二值化
def process_image(image_path):
    img = cv2.imread(image_path)
    height, width, _ = img.shape
    tile_height, tile_width = height // 6, width // 6
    tiles = [img[i * tile_height:(i + 1) * tile_height, j * tile_width:(j + 1) * tile_width] for i in range(6) for j in range(6)]
    
    white_counts = []
    for tile in tiles:
        gray = cv2.cvtColor(tile, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(gray, 30, 255, cv2.THRESH_BINARY)
        white_count = np.sum(thresh == 255)
        white_counts.append(white_count)
    
    return white_counts

In [2]:
# GLCM features binary version
def GLCM_process_binary(image_path):
    # 讀取資料夾路徑
    image_folder = os.listdir(image_path)
    # 2. 特徵提取
    features = []
    for img in image_folder:
        if img.endswith('.jpg') or img.endswith('.png'):
            # 讀取影像
            image = cv2.imread(os.path.join(image_path, img), cv2.IMREAD_GRAYSCALE)
            _, image = cv2.threshold(image, 30, 255, cv2.THRESH_BINARY)
            # 計算GLCM
            glcm = graycomatrix(image, [1], [0], symmetric=True, normed=True)
            
            # 提取GLCM特徵，使用能量和對比度
            energy = graycoprops(glcm, 'energy')[0, 0]
            dissimilarity = graycoprops(glcm, 'dissimilarity')[0, 0]
            contrast = graycoprops(glcm, 'contrast')[0, 0]
            homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
            ASM = graycoprops(glcm, 'ASM')[0, 0]
            correlation = graycoprops(glcm, 'correlation')[0, 0]
            
            # 將特徵保存到列表
            feature = [energy, dissimilarity, contrast, homogeneity, ASM, correlation]
            features.append(feature)
    GLCM_df = pd.DataFrame(features)
    return GLCM_df

# 模組化

In [3]:
### main funtion
def process_images_and_predict(test_folder, model_path, output_csv, remove_ring=False, binarize=True, glcm=True, json=False, threshold=False):
    if remove_ring:
        # 移除外圈
        for filename in os.listdir(test_folder):
            if filename.endswith('.jpg') or filename.endswith('.png'):
                image_path = os.path.join(test_folder, filename)
                img = remove_outerring(image_path)
        print(f'{test_folder}: outer ring removed!')

    # 二值化處理Test data中的圖片
    data = []
    tiles_amount = 36 #6*6
    if binarize:
        for image_path in glob.glob(os.path.join(test_folder, "*.jpg")):
            file_name = os.path.basename(image_path)
            white_counts = process_image(image_path)
            count_list = [count for count in white_counts]
            data.append([file_name] + count_list)
        columns = ['image_name'] + [f'Tile_{i}' for i in range(tiles_amount)]
        dfAll = pd.DataFrame(data, columns=columns)
    else:
        for image_path in glob.glob(os.path.join(test_folder, "*.jpg")):
            file_name = os.path.basename(image_path)
            data.append([file_name])
        columns = ['image_name']
        dfAll = pd.DataFrame(data, columns=columns)
    
    # GLCM特徵處理
    if glcm:
        glcm_features= GLCM_process_binary(test_folder)
        glcm = pd.DataFrame(glcm_features)
        dfAll = pd.concat([dfAll,glcm], axis=1)
    print('image processed!')
    print(dfAll)
    
    # 利用訓練好的模型進行預測
    if json == True:
        model = XGBClassifier(n_estimators=500, n_jobs=-1, max_depth=4, eta=0.1, colsample_bytree=0.67, early_stopping_rounds=300,)
        model.load_model(model_path)
    else:
        model = load(model_path)
    X_test = dfAll.drop(columns=['image_name'])
    X_test.columns = X_test.columns.astype(str)

    # 若有設定threshold，則讓模型輸出機率
    if threshold:
        y_pred_proba = model.predict_proba(X_test)
        y_pred_proba_class_1 = y_pred_proba[:, 1]
        # 將機率轉換為二元類別
        y_pred = (y_pred_proba_class_1 > threshold).astype(int)
    else:
        y_pred = model.predict(X_test)

    pred_df = pd.DataFrame()
    pred_df['image_name'] = dfAll['image_name'].values
    pred_df['result'] = y_pred

    # 儲存成CSV
    pred_df.to_csv(output_csv, index=False)
    print("Finished！")

# 呼叫Main function

In [4]:
import cv2
import numpy as np
import pandas as pd
import os
import glob
from joblib import dump, load
from xgboost import XGBClassifier
from skimage.feature import graycomatrix, graycoprops

# 用法
test_folder = r"C:\Users\user\Desktop\wafer\240708 AMAT test"
model_path = r"C:\Users\user\Desktop\wafer\Model\AMAT_Model8_XGBoost20240704.joblib"
output_csv = 'AMAT_Model8-1_20240709.csv'

# Main function，根據需求決定開啟哪些參數
process_images_and_predict(test_folder, model_path, output_csv, remove_ring=False, binarize=True, glcm=True, json=False)

image processed!
               image_name  Tile_0  Tile_1  Tile_2  Tile_3  Tile_4  Tile_5  \
0      WCWFC0001-05-A.jpg      18     157     184     152     154      27   
1      WCWFC0001-12-A.jpg      29     230     440     244     310      50   
2     WCWFC0001-13-S0.jpg      18     266     565     337     256      39   
3     WCWFC0001-24-S0.jpg      18     157     183     152     172      27   
4      WCWFC0002-15-A.jpg      18     251     259     216     181      27   
...                   ...     ...     ...     ...     ...     ...     ...   
1162   WCWFC0295-25-A.jpg      18     221     219     188     188      27   
1163  WCWFC0296-03-S0.jpg      18     243     151     152     154      27   
1164  WCWFC0296-10-S0.jpg      18     156     357     198     246      26   
1165  WCWFC0296-16-S0.jpg      18     190     161     206     218      27   
1166  WCWFC0296-21-S0.jpg      18     187     184     152     154      27   

      Tile_6  Tile_7  Tile_8  ...  Tile_32  Tile_33  Tile_