In [1]:
import os
# import pandas as pd
import numpy as np
import cv2

import joblib
from joblib import dump

import sys
sys.path.append("..")
# import utils.classifier_utils as clf
# from utils.imutils import jimshow as show
# from utils.imutils import jimshow_channel as show_channel

# Machine learning stuff
from sklearn import metrics
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

from tensorflow.keras.datasets import cifar10

# plotting tool
import matplotlib.pyplot as plt

# import argparse

2024-03-15 09:01:59.008348: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-15 09:01:59.014573: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-15 09:01:59.082876: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
# This function loads the data from the cifar10 dataset
def load_data():
    return cifar10.load_data()

# This function preprocesses the images
def preprocess_images(images):
    image_list = []  # Empty list which stores the flattened images
    
    for image in images:
        image_grey = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Converts the images into greyscale
        image_scaled = image_grey / 255.0 # Scales the images
        image_flattened = image_scaled.flatten()  # Flattens the images
        image_list.append(image_flattened)  # Appends the flattened images to the image_list
    images_processed = np.array(image_list) # Converts the list of flattened images to a np array
    return images_processed

# This function trains the logistic regression model
def train_model(X_train_processed, y_train_processed):
    return LogisticRegression(tol=0.1, solver='saga', multi_class='multinomial', random_state=42).fit(X_train_processed, y_train_processed)

# This function evaluates the performance of the trained classifier on the test dataset and produces a classification report
def evaluate_model(y_test_processed, X_test_processed, classifier):
    return metrics.classification_report(y_test_processed, classifier.predict(X_test_processed))

# This function saves the classification report and the logistic regression classifier model
def saving_report(classifier_metrics, classifier, report_path, model_path):

    # Opens the file in the out folder in write mode and writes the classification metrics to it.
    with open(report_path, "w") as file:
        file.write(classifier_metrics)

    # Saves the trained classifier in the models folder
    joblib.dump(classifier, model_path)

def main():
    # Creates a filepath for each directory 
    out_folder_path = os.path.join("..", "out", "logistic_regression")
    models_folder_path = os.path.join("..","models", "logistic_regression")

    # If the directory does not exist, make the directory
    os.makedirs(out_folder_path, exist_ok=True)
    os.makedirs(models_folder_path, exist_ok=True)

    # Filepath for each saved file
    model_path = os.path.join("..", "models", "logistic_regression", "regression_classifier.joblib")
    report_path = os.path.join("..","out", "logistic_regression", "classification_report.txt")
    # plot_path = os.path.join("..","out", "logistic_regression", "_______")
  
    # Loading the data
    (X_train, y_train), (X_test, y_test) = load_data()

    # Preprocessing the training and test images
    X_train_processed = preprocess_images(X_train)
    X_test_processed = preprocess_images(X_test)

    #### You'll need to make a list of labels based on the object names - you can find these on the website.
    y_train_processed = y_train.flatten()
    y_test_processed = y_test.flatten()

    #Training the logistic regression classifier
    classifier = train_model(X_train_processed, y_train_processed)

    # Creating the classification report
    classifier_metrics = evaluate_model(y_test_processed, X_test_processed, classifier)

    print(classifier_metrics)
    # Saving the classification report and the logistic regression classifier model
    saving_report(classifier_metrics, classifier, report_path, model_path)

if __name__ == "__main__":
    main()

              precision    recall  f1-score   support

           0       0.34      0.38      0.36      1000
           1       0.36      0.40      0.38      1000
           2       0.26      0.20      0.23      1000
           3       0.20      0.17      0.19      1000
           4       0.26      0.17      0.21      1000
           5       0.31      0.30      0.30      1000
           6       0.28      0.32      0.30      1000
           7       0.32      0.32      0.32      1000
           8       0.32      0.43      0.37      1000
           9       0.40      0.44      0.42      1000

    accuracy                           0.31     10000
   macro avg       0.31      0.31      0.31     10000
weighted avg       0.31      0.31      0.31     10000

