<a href="https://colab.research.google.com/github/juhumkwon/DataMining/blob/main/2_2_AdaBoost_%EC%98%88%EC%A0%9C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

# 데이터
x = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
y = np.array([1, 1, 1, -1, -1, -1, -1, 1, 1, 1])

# 초기 가중치
N = len(x)
w = np.ones(N) / N

def calculate_entropy(w, y):
    """가중 엔트로피 계산"""
    total_weight = np.sum(w)
    pos_weight = np.sum(w[y == 1])
    neg_weight = np.sum(w[y == -1])

    if pos_weight == 0 or neg_weight == 0:
        return 0

    pos_ratio = pos_weight / total_weight
    neg_ratio = neg_weight / total_weight

    return - (pos_ratio * np.log2(pos_ratio) + neg_ratio * np.log2(neg_ratio))

def find_best_split(x, y, w):
    """최적의 분할점 k 찾기"""
    best_k = None
    min_entropy = float('inf')

    # 가능한 분할점
    thresholds = (x[:-1] + x[1:]) / 2

    for k in thresholds:
        left_indices = x <= k
        right_indices = x > k

        left_entropy = calculate_entropy(w[left_indices], y[left_indices])
        right_entropy = calculate_entropy(w[right_indices], y[right_indices])

        total_entropy = (np.sum(w[left_indices]) * left_entropy +
                         np.sum(w[right_indices]) * right_entropy)

        if total_entropy < min_entropy:
            min_entropy = total_entropy
            best_k = k

    return best_k, min_entropy

def adaboost(x, y, T):
    """AdaBoost 알고리즘"""
    N = len(x)
    w = np.ones(N) / N  # 초기 가중치
    alphas = []
    classifiers = []

    for t in range(T):
        # 1. 최적의 분할점 k 찾기
        k, _ = find_best_split(x, y, w)

        # 2. 약한 학습기 예측
        predictions = np.where(x <= k, 1, -1)

        # 3. 오류율 계산
        error = np.sum(w[predictions != y]) / np.sum(w)

        # 오류율이 0.5 이상이면 중단
        if error >= 0.5:
            break

        # 4. 학습기의 가중치(alpha) 계산
        alpha = 0.5 * np.log((1 - error) / error)

        # 5. 가중치 업데이트
        w = w * np.exp(-alpha * y * predictions)
        w = w / np.sum(w)  # 정규화

        # 6. 학습기 저장
        alphas.append(alpha)
        classifiers.append(k)

    return alphas, classifiers

# AdaBoost 실행
T = 5  # 최대 반복 수
alphas, classifiers = adaboost(x, y, T)

# 결과 출력
print("학습기 가중치 (alphas):", alphas)
print("분할점 (classifiers):", classifiers)


학습기 가중치 (alphas): [0.4236489301936017]
분할점 (classifiers): [0.35]
