In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Read Input Data

In [None]:
df = pd.read_csv('iris.csv')
df.info()
df.head()

In [16]:
def create_labeled_scatter_plot(
    DF: pd.DataFrame,
    x_col: str,
    y_col: str,
    z_col: str
):
    unique_labels = DF[z_col].unique()
    colors = plt.cm.get_cmap('tab10', len(unique_labels)).colors

    FIG = plt.figure(figsize=(10, 6))
    AX = FIG.add_subplot(1, 1, 1)

    for i, label in enumerate(unique_labels):
        subset = DF[DF[z_col] == label]
        AX.scatter(x=subset[x_col], y=subset[y_col], c=[colors[i]], label=label)

    AX.set_xlabel(x_col)
    AX.set_ylabel(y_col)
    AX.set_title(f'{x_col} vs {y_col} by {z_col}')

    AX.legend()
    return AX

In [None]:
# Create a scatter plot
create_labeled_scatter_plot(
    DF=df,
    x_col='sepal.length',
    y_col='sepal.width',
    z_col='variety',
)

create_labeled_scatter_plot(
    DF=df,
    x_col='petal.length',
    y_col='petal.width',
    z_col='variety',
)

#  Multi-class Classification

Generalize the Perceptron algorithm. 

**One vs. All** 
- train a perceptron-classifier for each class.
- output: $arg \ max_i(w,x)$ (max of all predictions of classifiers)

## Multi-class Dataframe => binary-outcome dataframes

In [None]:
def make_df_binary(
    DF: pd.DataFrame,
    target_col: str,
    positive_class: str
) -> pd.DataFrame:
    bin_df = DF.copy()
    bin_df.rename(columns={target_col: f'is{positive_class}'}, inplace=True)
    bin_df[f'is{positive_class}'] = DF[target_col].apply(
        lambda v: 1 if v == positive_class else -1
    )
    return bin_df

def get_binary_DFs(DF: pd.DataFrame, target_col: str) -> dict[str, pd.DataFrame]:
    unique_labels = DF[target_col].unique()
    binary_dfs: dict[str, pd.DataFrame] = {}
    for label in unique_labels:
        binary_dfs[label] = make_df_binary(DF, target_col, label)
    return binary_dfs

binary_dfs = get_binary_DFs(df, 'variety')

print('Binary DataFrames:')
for label, bin_df in binary_dfs.items():
    print(f'{label}:')
    print(f'# of positive class: {len(bin_df[bin_df[f"is{label}"] == 1])}')

In [None]:
for label, BDF in binary_dfs.items():
    create_labeled_scatter_plot(
        DF=BDF,
        x_col='sepal.length',
        y_col='sepal.width',
        z_col=f'is{label}',
    )

    create_labeled_scatter_plot(
        DF=BDF,
        x_col='petal.length',
        y_col='petal.width',
        z_col=f'is{label}',
    )

# Classifier 

$$
    h(x) = sgn(w^T \cdot x +b)
$$

In [20]:
def get_BinaryClassifier(w, b) -> lambda x: float:
    '''
        :note: take the sign of the output to get the predicted class
    '''
    return lambda x: (np.dot(np.transpose(w), x) + b)

# Perceptron Algorithm

In [29]:
# PERCEPTRON
def Perceptron(
    D: pd.DataFrame,
    labelCol: str,
    w: np.ndarray,
    b: float,
    delta: float,
    onIncorrect: (lambda x, y, y_hat, w0, b0, w1, b1: None) = None,
) -> tuple[np.ndarray, float]:
    d = len(w) # Number of features
    # For each sample in
    for i in D.index:
        # feature vector = first d columns of the ith row
        x: np.ndarray = D.iloc[i, :d].values
        y = D[labelCol][i]  # -1 | 1
        # Compute prediction
        y_hat = np.dot(np.transpose(w), x) + b
        # Is it incorrect?
        if y * y_hat <= delta:
            # Calculate new parameters
            w_i = w + x*y
            b_i = b + y
            # Do something
            if(onIncorrect): onIncorrect(x, y, y_hat, w, b, w_i, b_i)
            # Update parameters
            w = w_i
            b = b_i
    # Return the classifier
    return [w, b]

In [None]:
# EXAMPLE

bin_df = binary_dfs['Versicolor']
AX1 = create_labeled_scatter_plot(bin_df, 'sepal.length', 'sepal.width', 'isVersicolor')
AX2 = create_labeled_scatter_plot(bin_df, 'petal.length', 'petal.width', 'isVersicolor')

fix_num: int = 0
def onIncorrect(x, y, y_hat, w0, b0, w1, b1):
    global fix_num
    fix_num += 1
    print(f'''Incorrect Guess:
    w = {w0}
    b = {b0}
    Feature = {x}
    Label: {y}
    Prediction: {y_hat}
    w' <= {w1}
    b' <= {b1}
    ''')
    # CREATE LINE (AX1)
    def plotLine(max_x: np.ndarray, max_y: np.ndarray, m: np.ndarray, b: float, AX) -> np.ndarray:
        x: np.ndarray = np.linspace(0, max_x, 50)  # Adjusted to 100 points
        y = m * x + b
        # restrict x & y to x[:i] and y[:i] where i = min(index where y > y_max, index where x > x_max)
        y_indices = np.where(y > max_y)[0]
        x_indices = np.where(x > max_x)[0]
        if y_indices.size > 0 and x_indices.size > 0:
            i = min(y_indices[0], x_indices[0])
            x = x[:i]
            y = y[:i]
        # Add the line to the plot
        AX.add_line(plt.Line2D(x, y, color=np.random.rand(3,), label=f'y = {np.round(m, 2)}x - {b}'))

    plotLine(bin_df['sepal.length'].max(), bin_df['sepal.width'].max(), w1[1]/w1[0], b1, AX1)
    plotLine(bin_df['petal.length'].max(), bin_df['petal.width'].max(), w1[2]/w1[0], b1, AX2)

Perceptron(
    D=bin_df,
    labelCol='isVersicolor',
    w=np.ones(bin_df.shape[1] - 1),  # Adjust the length of w to match the number of features
    b=1,
    delta=0,
    onIncorrect=onIncorrect
)

AX1.legend()
AX2.legend()

In [30]:
# Get a classifier from each binary dataset.
labeled_classifiers = {}
for label, bin_df in binary_dfs.items():
    # Get the parameters
    w,b = Perceptron(
        D=bin_df,
        labelCol=f'is{label}',
        w=np.ones(bin_df.shape[1] - 1),  # Adjust the length of w to match the number of features
        b=1,
        delta=0,
        onIncorrect=onIncorrect
    )
    classifier = get_BinaryClassifier(w, b)
    # Create the classifier
    labeled_classifiers[label] = classifier

Incorrect Guess:
    w = [1. 1. 1. 1.]
    b = 1
    Feature = [7.  3.2 4.7 1.4]
    Label: -1
    Prediction: 17.299999999999997
    w' <= [-6.  -2.2 -3.7 -0.4]
    b' <= 0
    
Incorrect Guess:
    w = [1. 1. 1. 1.]
    b = 1
    Feature = [5.1 3.5 1.4 0.2]
    Label: -1
    Prediction: 11.2
    w' <= [-4.1 -2.5 -0.4  0.8]
    b' <= 0
    
Incorrect Guess:
    w = [-4.1 -2.5 -0.4  0.8]
    b = 0
    Feature = [7.  3.2 4.7 1.4]
    Label: 1
    Prediction: -37.459999999999994
    w' <= [2.9 0.7 4.3 2.2]
    b' <= 1
    
Incorrect Guess:
    w = [2.9 0.7 4.3 2.2]
    b = 1
    Feature = [6.3 3.3 6.  2.5]
    Label: -1
    Prediction: 52.88000000000001
    w' <= [-3.4 -2.6 -1.7 -0.3]
    b' <= 0
    
Incorrect Guess:
    w = [1. 1. 1. 1.]
    b = 1
    Feature = [5.1 3.5 1.4 0.2]
    Label: -1
    Prediction: 11.2
    w' <= [-4.1 -2.5 -0.4  0.8]
    b' <= 0
    
Incorrect Guess:
    w = [-4.1 -2.5 -0.4  0.8]
    b = 0
    Feature = [6.3 3.3 6.  2.5]
    Label: 1
    Prediction: -34.48
 

# MultiClass Classifier (One vs. All)

$$
    h(x) = min_{\purple i}(\purple{w_i}^T \cdot x + \purple b_i)
$$

In [24]:
def Create_OneVsAllClassifier(labeled_classifiers: dict[str, lambda x: float]):
    def func(x):
        # Return the label with the highest prediction
        labeled_predictions = {label: H(x) for label, H in labeled_classifiers.items()}
        return max(labeled_predictions, key=labeled_predictions.get)
    return func

In [25]:
# Create a multi-classier from the classifiers
multi_classifier = Create_OneVsAllClassifier(labeled_classifiers)

## Testing

In [None]:
# Apply it to the dataset
D = df
predictions = []
for i in D.index:
    x_i = D.iloc[i, :4].values
    y_i = D['variety'][i]
    y_hat = multi_classifier(x_i)
    # Print the result
    print(f'Actual: {y_i}, Predicted: {y_hat}')
    predictions.append(y_hat)

# Full Algorithm

In [63]:
# ONE VS. ALL
def get_Multiclass_OneVsAll_Classifier(
    D: pd.DataFrame,
    labelCol: str,
    perceptron_iters=1
):
    '''
    :param D: Training Dataset
    :param labelCol: Column name of the label
    :param max_perceptron_iters: Maximum number of iterations for each perceptron
    :return: Multi-class classifier, lambda x: str
    '''
    # STEP 1: Get the binary datasets
    binary_dfs = get_binary_DFs(D, labelCol)
    # STEP 2: Get the labeled classifiers
    labeled_classifiers = {}
    for label, bin_df in binary_dfs.items():
        # Start Params
        w = np.ones(bin_df.shape[1] - 1)
        b = 1
        # Update Params Iteratively
        for i in range(perceptron_iters):
            # Get the parameters
            w,b = Perceptron(
                D=bin_df,
                labelCol=f'is{label}',
                w=w,  # Adjust the length of w to match the number of features
                b=b,
                delta=0,
            )
        # Create the classifier
        labeled_classifiers[label] = get_BinaryClassifier(w, b)
    # STEP 3: Create the multi-class classifier
    return Create_OneVsAllClassifier(labeled_classifiers)

classifier = get_Multiclass_OneVsAll_Classifier(df, 'variety')