In [1]:

import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def load_data():
    iris = load_iris()
    X = iris.data
    y = iris.target
    y[y != 0] = -1  # Convert to binary classification problem for simplicity
    y[y == 0] = 1
    return train_test_split(X, y, test_size=0.2, random_state=42)

def stump_classifier(X, y, weights):
    m, n = X.shape
    best_stump = {}
    min_error = float('inf')

    for j in range(n):  # Loop over all features
        feature_values = np.sort(np.unique(X[:, j]))
        thresholds = (feature_values[:-1] + feature_values[1:]) / 2
        for threshold in thresholds:
            for inequality in ["lt", "gt"]:  # Check both directions
                predictions = np.ones(m)
                if inequality == "lt":
                    predictions[X[:, j] <= threshold] = -1
                else:
                    predictions[X[:, j] > threshold] = -1

                error = np.sum(weights[y != predictions])
                if error < min_error:
                    min_error = error
                    best_stump["dim"] = j
                    best_stump["thresh"] = threshold
                    best_stump["ineq"] = inequality

    return best_stump, min_error

def adaboost_train(X, y, M=50):
    weak_classifiers = []
    m = X.shape[0]
    D = np.ones(m) / m  # Initialize weights

    for i in range(M):
        stump, error = stump_classifier(X, y, D)
        alpha = 0.5 * np.log((1 - error) / max(error, 1e-10))
        stump["alpha"] = alpha
        weak_classifiers.append(stump)

        # Update weights
        predictions = np.ones(m)
        if stump["ineq"] == "lt":
            predictions[X[:, stump["dim"]] <= stump["thresh"]] = -1
        else:
            predictions[X[:, stump["dim"]] > stump["thresh"]] = -1

        D *= np.exp(-alpha * y * predictions)
        D /= D.sum()

    return weak_classifiers

def adaboost_predict(X, weak_classifiers):
    m = X.shape[0]
    predictions = np.zeros(m)

    for stump in weak_classifiers:
        stump_predictions = np.ones(m)
        if stump["ineq"] == "lt":
            stump_predictions[X[:, stump["dim"]] <= stump["thresh"]] = -1
        else:
            stump_predictions[X[:, stump["dim"]] > stump["thresh"]] = -1

        predictions += stump["alpha"] * stump_predictions

    return np.sign(predictions)

# Load data
X_train, X_test, y_train, y_test = load_data()

# Train AdaBoost
weak_classifiers = adaboost_train(X_train, y_train, M=50)

# Make predictions
y_pred = adaboost_predict(X_test, weak_classifiers)

# Evaluate
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)




Accuracy: 1.0


### Explanation:

- `load_data`: Loads the Iris dataset and converts it into a binary classification problem for simplicity.
- `stump_classifier`: Creates a decision stump. It finds the best feature and threshold to split the data to minimize weighted classification error.
- `adaboost_train`: Implements the AdaBoost algorithm. It iteratively creates decision stumps, updates weights of training instances, and calculates the alpha values (which represent the weight of each stump in the final classification).
- `adaboost_predict`: Makes predictions using the ensemble of stumps created during training.
- Finally, the script trains the AdaBoost model on the Iris dataset and evaluates its accuracy.

Remember, this is a basic implementation and lacks many features and optimizations of professional libraries. However, it should give you a conceptual understanding of how AdaBoost works under the hood.