<a href="https://colab.research.google.com/github/sat0urn/CV_Assignments/blob/main/Assignment_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Assignment \#1**
# ***Komekbayev Zeyin & Kusainov Aslan SE-2116***

# **Loading "*Agricultural Crops*" Image Classification Dataset**

In [40]:
import numpy as np
import os, cv2, random, math
import pdb # For debugging

# Loading Agricultural Crops Dataset

def load_AGRI_CROPS(data_dir):
    classes = os.listdir(data_dir)

    images = []

    labels = []

    for class_name in classes:
      class_dir = os.path.join(data_dir, class_name)
      for image_file in os.listdir(class_dir):
        image_path = os.path.join(class_dir, image_file)
        image = cv2.imread(image_path)
        image = cv2.resize(image, (64, 64))
        image = image.flatten()
        images.append(image)
        labels.append(class_name)

    return np.array(images), np.array(labels)

data_dir = '/content/drive/MyDrive/Colab Notebooks/Agricultural-crops'

images, labels = load_AGRI_CROPS(data_dir)

# **Softmax Regression Algorithm**

In [41]:
# Softmax Regression Algorithm Class
class SoftmaxRegression:
    def __init__(self, learning_rate=0.01, n_iterations=100, regularization=None, lambda_reg=0.01):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.regularization = regularization
        self.lambda_reg = lambda_reg

    def fit(self, X, y):
        num_samples, num_features = X.shape[0], X.shape[1]
        num_classes = len(np.unique(y))

        # Initialize weights and biases
        self.weights = np.random.randn(num_features, num_classes)
        self.bias = np.zeros((1, num_classes))

        for i in range(self.n_iterations):
            # Compute softmax scores
            scores = X.dot(self.weights) + self.bias

            # Compute softmax probabilities
            max_scores = np.max(scores, axis=1, keepdims=True)
            exp_scores = np.exp(scores - max_scores)
            probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

            # Calculate the loss
            loss = -np.log(probs[range(num_samples), y] + 1e-7)
            data_loss = np.sum(loss) / num_samples
            reg_loss = 0.5 * self.lambda_reg * np.sum(self.weights ** 2)
            total_loss = data_loss + reg_loss

            # Compute gradients
            delta = probs
            delta[range(num_samples), y] -= 1
            delta /= num_samples

            dtheta = X.T.dot(delta)

            db = np.sum(delta, axis=0, keepdims=True)

            # Regularizations
            if self.regularization == 'L2':
                dtheta += self.lambda_reg * self.weights
            elif self.regularization == 'L1':
                dtheta += self.lambda_reg * np.sign(self.weights)

            # Update weights and biases
            self.weights -= self.learning_rate * dtheta
            self.bias -= self.learning_rate * db

    def predict(self, X):
        scores = X.dot(self.weights) + self.bias
        return np.argmax(scores, axis=1)

# **SVM Algorithm**

In [42]:
# SVM Algorithm Class
class SVM:
    def __init__(self, learning_rate=0.01, n_iterations=100, regularization=None, lambda_reg=0.01):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.regularization = regularization
        self.lambda_reg = lambda_reg

    def fit(self, X, y):
        num_samples, num_features = X.shape[0], X.shape[1]
        num_classes = len(np.unique(y))

        # Initialize weights and biases
        self.weights = np.random.rand(num_features, num_classes)
        self.bias = np.zeros((1, num_classes))

        for i in range(self.n_iterations):
            # Calculate the raw scores for each class using
            # dot product of training data and weights, then add bias
            scores = X.dot(self.weights) + self.bias

            # Calculate the scores of the correct class for each sample
            correct_scores = scores[range(num_samples), y]

            # Calculate margins, which represent the difference between
            # the scores of the correct class and other classes
            margins = np.maximum(0, scores - correct_scores[:, np.newaxis] + 1)
            margins[range(num_samples), y] = 0
            loss = np.sum(margins) / num_samples

            # Compute gradients
            binary = margins
            binary[margins > 0] = 1
            row_sum = np.sum(binary, axis=1)
            binary[range(num_samples), y] = -row_sum
            dtheta = X.T.dot(binary) / num_samples

            if self.regularization == 'L2':
                dtheta += self.lambda_reg * self.weights
            elif self.regularization == 'L1':
                dtheta += self.lambda_reg * np.sign(self.weights)

            self.weights -= self.learning_rate * dtheta

    def predict(self, X):
        scores = X.dot(self.weights) + self.bias
        return np.argmax(scores, axis=1)

# **Divide into different sets (train, validation, test)**

In [43]:
# Divide data into train, validation, and test sets
def train_val_test_split(X, y, val_size=0.1, test_size=0.1):
    num_samples = X.shape[0]

    # Initialize size of test and validation
    val_size = int(num_samples * val_size)
    test_size = int(num_samples * test_size)

    indices = np.random.permutation(num_samples)

    # Defining Indices
    val_indices = indices[:val_size]
    test_indices = indices[val_size:val_size+test_size]
    train_indices = indices[val_size+test_size:]

    # Split All Sets to Indices
    X_val, y_val = X[val_indices], y[val_indices]
    X_test, y_test = X[test_indices], y[test_indices]
    X_train, y_train = X[train_indices], y[train_indices]

    return X_train, y_train, X_val, y_val, X_test, y_test

# Split data into train, validation, and test sets
X_train, y_train, X_val, y_val, X_test, y_test = train_val_test_split(images, labels, val_size=0.1, test_size=0.1)

# Normalize the data
class_to_label = {class_name: i for i, class_name in enumerate(np.unique(y_train))}
label_to_class = {i: class_name for class_name, i in class_to_label.items()}

# Convert class labels to integer labels
y_train = np.array([class_to_label[class_name] for class_name in y_train])
y_test = np.array([class_to_label[class_name] for class_name in y_test])
y_val = np.array([class_to_label[class_name] for class_name in y_val])

# Flatten the data
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
X_val = X_val.reshape(X_val.shape[0], -1)

# **Split Train into Train & Validation sets for Cross-Validation**

In [44]:
# Further split train into train and validation sets for cross-validation
def k_fold_split(X, y, k=5):
    fold_size = len(X) // k
    for i in range(k):
        start = i * fold_size
        end = (i + 1) * fold_size
        X_val = X[start:end]
        y_val = y[start:end]
        X_train = np.concatenate([X[:start], X[end:]])
        y_train = np.concatenate([y[:start], y[end:]])
        yield X_train, y_train, X_val, y_val

# **Calculate Accuracy**

In [45]:
# Function to calculate an accuracy
def calculate_accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

# **Models Initialization and Accuracy Evaluation**

In [46]:
# Initialize models
softmax_model = SoftmaxRegression(learning_rate=0.01, n_iterations=100, regularization='L2', lambda_reg=0.001)
svm_model = SVM(learning_rate=0.01, n_iterations=100, regularization='L2', lambda_reg=0.001)

best_accuracy = -1
best_algorithm = None

softmax_accuracies = []
svm_accuracies = []

for X_train_fold, y_train_fold, X_val_fold, y_val_fold in k_fold_split(X_train, y_train, k=5):
    # Train Softmax Regression
    softmax_model.fit(X_train_fold, y_train_fold)
    softmax_predictions = softmax_model.predict(X_val_fold)
    softmax_accuracy = calculate_accuracy(y_val_fold, softmax_predictions)
    softmax_accuracies.append(softmax_accuracy)

    # Train SVM
    svm_model.fit(X_train_fold, y_train_fold)
    svm_predictions = svm_model.predict(X_val_fold)
    svm_accuracy = calculate_accuracy(y_val_fold, svm_predictions)
    svm_accuracies.append(svm_accuracy)

average_softmax_accuracy = np.mean(softmax_accuracies)
average_svm_accuracy = np.mean(svm_accuracies)

if average_softmax_accuracy > best_accuracy:
    best_accuracy = average_softmax_accuracy
    best_algorithm = "Softmax Regression Algorithm"

if average_svm_accuracy > best_accuracy:
    best_accuracy = average_svm_accuracy
    best_algorithm = "SVM Algorithm"

# Evaluate the best model on the test set
if best_algorithm == "Softmax Regression Algorithm":
    softmax_model.fit(X_train, y_train)
    test_predictions = softmax_model.predict(X_test)
elif best_algorithm == "SVM Algorithm":
    svm_model.fit(X_train, y_train)
    test_predictions = svm_model.predict(X_test)

test_accuracy = calculate_accuracy(y_test, test_predictions)

# **Output / Result**

In [47]:
# Best Algorithm Output Information
print(f"Algorithm with Best Accuracy: {best_algorithm}")
print(f"Test Accuracy of {best_algorithm}: {test_accuracy}")
print(f"Average Accuracy for Softmax Regression Algorithm in 5k-CV: {average_softmax_accuracy}")
print(f"Average Accuracy for SVM Algorithm in 5k-CV: {average_svm_accuracy}")

Algorithm with Best Accuracy: SVM Algorithm
Test Accuracy of SVM Algorithm: 0.24390243902439024
Average Accuracy for Softmax Regression Algorithm in 5k-CV: 0.16240601503759397
Average Accuracy for SVM Algorithm in 5k-CV: 0.18045112781954886


# **Reason for SVM Algorithm for being More Accurant**

So we can see that SVM is more suitable for large and complex high-dimensional dataset of images for classification than Softmax Regression. By end result it is showing their not being too far from each other in terms of accuracy score, but SVM takes more places in continouos checking.