In [None]:
!pip3 install numpy matplotlib pandas scikit-learn opencv-python tensorflow

In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
plot_colors = "ryb"
plot_step = 0.02


def decision_boundary(X, y, model, iris, two=None):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(
        np.arange(x_min, x_max, plot_step), np.arange(y_min, y_max, plot_step)
    )
    plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5)

    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    cs = plt.contourf(xx, yy, Z, cmap=plt.cm.RdYlBu)

    if two:
        cs = plt.contourf(xx, yy, Z, cmap=plt.cm.RdYlBu)
        for i, color in zip(np.unique(y), plot_colors):

            idx = np.where(y == i)
            plt.scatter(X[idx, 0], X[idx, 1], label=y, cmap=plt.cm.RdYlBu, s=15)
        plt.show()

    else:
        set_ = {0, 1, 2}
        print(set_)
        for i, color in zip(range(3), plot_colors):
            idx = np.where(y == i)
            if np.any(idx):

                set_.remove(i)

                plt.scatter(
                    X[idx, 0],
                    X[idx, 1],
                    label=y,
                    cmap=plt.cm.RdYlBu,
                    edgecolor="black",
                    s=15,
                )

        for i in set_:
            idx = np.where(iris.target == i)
            plt.scatter(X[idx, 0], X[idx, 1], marker="x", color="black")

        plt.show()

In [None]:
def plot_probability_array(X, probability_array):

    plot_array = np.zeros((X.shape[0], 30))
    col_start = 0
    ones = np.ones((X.shape[0], 30))
    for class_, col_end in enumerate([10, 20, 30]):
        plot_array[:, col_start:col_end] = np.repeat(
            probability_array[:, class_].reshape(-1, 1), 10, axis=1
        )
        col_start = col_end
    plt.imshow(plot_array)
    plt.xticks([])
    plt.ylabel("samples")
    plt.xlabel("probability of 3 classes")
    plt.colorbar()
    plt.show()

In [None]:
pair = [1, 3]
X = X_2D
np.unique(y)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdYlBu)
plt.xlabel("sepal width (cm)")
plt.ylabel("petal width")

In [None]:
lr = LogisticRegression(random_state=0).fit(X, y)

In [None]:
probability = lr.predict_proba(X)

In [None]:
plot_probability_array(X, probability)

In [None]:
probability[0, :]

In [None]:
probability[0, :].sum()

In [None]:
np.argmax(probability[0, :])

In [None]:
softmax_prediction = np.argmax(probability, axis=1)
softmax_prediction

In [None]:
yhat = lr.predict(X)
accuracy_score(yhat, softmax_prediction)

In [None]:
model = SVC(kernel="linear", gamma=0.5, probability=True)

model.fit(X, y)

In [None]:
yhat = model.predict(X)
accuracy_score(y, yhat)

In [None]:
decision_boundary(X, y, model, iris)

In [None]:
dummy_class = y.max() + 1
my_models = []

for class_ in np.unique(y):

    select = y == class_
    temp_y = np.zeros(y.shape)

    temp_y[y == class_] = class_

    temp_y[y != class_] = dummy_class

    model = SVC(kernel="linear", gamma=0.5, probability=True)
    my_models.append(model.fit(X, temp_y))

    decision_boundary(X, temp_y, model, iris)

In [None]:
probability_array = np.zeros((X.shape[0], 3))
for j, model in enumerate(my_models):

    real_class = np.where(np.array(model.classes_) != 3)[0]

    probability_array[:, j] = model.predict_proba(X)[:, real_class][:, 0]

In [None]:
probability_array[0, :]

In [None]:
probability_array[0, :].sum()

In [None]:
plot_probability_array(X, probability_array)

In [None]:
one_vs_all = np.argmax(probability_array, axis=1)
one_vs_all

In [None]:
accuracy_score(y, one_vs_all)

In [None]:
accuracy_score(one_vs_all, yhat)

In [None]:
classes_ = set(np.unique(y))
classes_

In [None]:
K = len(classes_)
K * (K - 1) / 2

In [None]:
pairs = []
left_overs = classes_.copy()

my_models = []

for class_ in classes_:

    left_overs.remove(class_)

    for second_class in left_overs:
        pairs.append(str(class_) + " and " + str(second_class))
        print("class {} vs class {} ".format(class_, second_class))
        temp_y = np.zeros(y.shape)

        select = np.logical_or(y == class_, y == second_class)

        model = SVC(kernel="linear", gamma=0.5, probability=True)
        model.fit(X[select, :], y[select])
        my_models.append(model)

        decision_boundary(X[select, :], y[select], model, iris, two=True)

In [None]:
pairs

In [None]:
pairs
majority_vote_array = np.zeros((X.shape[0], 3))
majority_vote_dict = {}
for j, (model, pair) in enumerate(zip(my_models, pairs)):

    majority_vote_dict[pair] = model.predict(X)
    majority_vote_array[:, j] = model.predict(X)

In [None]:
pd.DataFrame(majority_vote_dict).head(10)

In [None]:
one_vs_one = np.array(
    [np.bincount(sample.astype(int)).argmax() for sample in majority_vote_array]
)
one_vs_one

In [None]:
accuracy_score(y, one_vs_one)

In [None]:
accuracy_score(yhat, one_vs_one)

<!--
## Change Log
| Date (YYYY-MM-DD) | Version | Changed By | Change Description      |
| ----------------- | ------- | ---------- | ----------------------- |
| 2020-07-20        | 0.2     | Azim       | Modified Multiple Areas |
| 2020-07-17        | 0.1     | Azim       | Created Lab Template    |
| 2022-08-31        | 0.3     | Steve Hord | QA pass edits           |
--!>


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder


# Load and preprocess images
def load_images(folder, size=(64, 64)):
    X = []
    y = []
    for label in os.listdir(folder):
        label_path = os.path.join(folder, label)
        if not os.path.isdir(label_path):
            continue
        for img_name in os.listdir(label_path):
            img_path = os.path.join(label_path, img_name)
            try:
                img = cv2.imread(img_path)
                img = cv2.resize(img, size)
                X.append(img.flatten())
                y.append(label)
            except:
                print(f"Error loading {img_path}")
    return np.array(X), np.array(y)


# Load dataset
X, y = load_images("food_images")

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

# Choose classifier: Logistic Regression or SVM
clf = SVC(kernel="linear", probability=True)  # or LogisticRegression()
clf.fit(X_train, y_train)

# Predict
y_pred = clf.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))