In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.ndimage import median_filter, rotate, shift
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [2]:
df = pd.read_pickle("/Users/unsi/Downloads/LSWMD.pkl")


In [None]:
class2idx = {
    'Loc': 0,
    'Edge-Loc': 1,
    'Center': 2,
    'Edge-Ring': 3, 
    'Scratch': 4,
    'Random': 5, 
    'Near-full': 6,
    'Donut': 7
}

In [5]:
df["failureType"]

0              [[none]]
1              [[none]]
2              [[none]]
3              [[none]]
4              [[none]]
              ...      
811452    [[Edge-Ring]]
811453     [[Edge-Loc]]
811454    [[Edge-Ring]]
811455               []
811456               []
Name: failureType, Length: 811457, dtype: object

In [None]:

# 샘플 데이터 생성
def generate_sample_data(n_samples=1000, size=(26, 26)):
    data = []
    labels = []
    for _ in range(n_samples):
        wafer_map = np.random.choice([0, 1, 2], size=size, p=[0.7, 0.2, 0.1])  # 70%는 0, 20%는 정상, 10%는 결함
        label = 0 if np.sum(wafer_map == 2) == 0 else 1  # 결함이 있으면 1, 없으면 0
        data.append(wafer_map)
        labels.append(label)
    return np.array(data), np.array(labels)

# 1. 필터링 - 미디언 필터 적용
def apply_median_filter(wafer_maps):
    filtered_maps = []
    for wafer_map in wafer_maps:
        # 관심 있는 영역(1과 2)에만 필터 적용
        filtered_map = np.where(wafer_map == 0, 0, median_filter(wafer_map, size=3))
        filtered_maps.append(filtered_map)
    return np.array(filtered_maps)

# 2. 데이터 증강 - 회전 및 뒤집기
def augment_data(wafer_maps):
    augmented_maps = []
    for wafer_map in wafer_maps:
        augmented_maps.append(wafer_map)  # 원본 추가
        augmented_maps.append(rotate(wafer_map, 90))  # 90도 회전
        augmented_maps.append(np.flip(wafer_map, axis=0))  # 상하 뒤집기
        augmented_maps.append(np.flip(wafer_map, axis=1))  # 좌우 뒤집기
    return np.array(augmented_maps)

# 3. 특징 추출 - 결함 비율 및 결함 분포
def extract_features(wafer_maps):
    features = []
    for wafer_map in wafer_maps:
        total_die = np.sum(wafer_map > 0)
        defect_die = np.sum(wafer_map == 2)
        defect_ratio = defect_die / total_die if total_die > 0 else 0
        
        # 결함이 중앙에 집중되었는지, 가장자리에 집중되었는지
        center_region = wafer_map[8:18, 8:18]  # 중앙 영역 (대략적인 범위)
        center_defect_ratio = np.sum(center_region == 2) / defect_die if defect_die > 0 else 0
        
        features.append([defect_ratio, center_defect_ratio])
    return np.array(features)

In [None]:

# 4. 데이터 로드 및 전처리
wafer_maps, labels = generate_sample_data()
filtered_wafer_maps = apply_median_filter(wafer_maps)
augmented_wafer_maps = augment_data(filtered_wafer_maps)
augmented_labels = np.tile(labels, 4)  # 증강한 데이터에 맞춰 레이블도 확장

# 5. 특징 추출
features = extract_features(augmented_wafer_maps)

# 6. 훈련/테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(features, augmented_labels, test_size=0.2, random_state=42)

# 7. 머신러닝 모델 - Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 8. 성능 평가
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
