In [1]:
# Import necessary libraries
# Install the required packages (Optional in Colab for scikit-learn and numpy, but good to have for documentation)
!pip install scikit-learn numpy   # Install the required packages
!pip install PyYAML






In [2]:
# Import necessary libraries
from google.colab import drive  # Library for connecting to Google Drive
import zipfile
import os
import yaml
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import numpy as np
import shutil
import warnings
from tabulate import tabulate


# Define the path to the zip file and the extraction folder
zip_file_path = '/content/drive/MyDrive/002__DISSERTATION/DATASET/images/sample.zip'
extract_folder = 'extracted_content/'


# Mount Google Drive to access files
drive.mount("/content/drive", force_remount=True)


# Suppress FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)


Mounted at /content/drive


In [3]:
# Initialize an empty list to hold the classes
label_classes = []

try:
    # Unzip the file
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_folder)

    # Navigate through the folders and subfolders
    for root, dirs, files in os.walk(extract_folder):
        #print(f"Checking files in directory: {root}")  # Debug line
        for file in files:
            #print(f"Checking file: {file}")  # Debug line
            # Look for data.yaml file
            if file == 'data.yaml':
                yaml_file_path = os.path.join(root, file)

                # Read and parse the YAML file
                with open(yaml_file_path, 'r') as yaml_file:
                    yaml_content = yaml.safe_load(yaml_file)

                    # Get the list of classes from the 'names' section
                    if 'names' in yaml_content:
                        label_classes.extend(yaml_content['names'])

    if len(label_classes) > 4:
        # Remove duplicates (if any)
        label_classes = list(set(label_classes))

        # Delete the extracted folder
        shutil.rmtree(extract_folder)

        # Initialize the label encoder
        label_encoder = LabelEncoder()

        # Perform integer encoding on the labels
        integer_encoded = label_encoder.fit_transform(label_classes)

        # Initialize the one-hot encoder
        onehot_encoder = OneHotEncoder(sparse=False)

        # Reshape the integer array for one-hot encoding
        integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)

        # Perform one-hot encoding on the integer-encoded labels
        onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
        onehot_np_encoded = np.array([onehot_encoded])

        # Display the one-hot encoded array
        #### print("One-hot Encoded array:")
        #### print("-----------------------")
        #### print(onehot_encoded)
        #### print('\n')

        #### print('\nClass in YAML_FILE:')
        #### # Display the classes
        #### # Iterate through the list and print each element on a new line
        #### for item in label_classes:
        ####     print(f'- {item}')

        # Combine one-hot encoded array with class names for tabulation
        tabulated_data = [[row, label_classes] for row, label_classes in zip(onehot_encoded, label_classes)]

        # Display the tabulated data
        headers = ["One-hot Encoded array", "Class Name"]
        print(tabulate(tabulated_data, headers=headers))
    else:
        print('No Class_File_Found')

except Exception as e:
    print(f"Issue encountered: {e}")

One-hot Encoded array               Class Name
----------------------------------  ------------
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]  person
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]  SafetyShoe
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]  vest
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]  faceMask
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]  no_gloves
[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]  helmet
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]  no_faceMask
[0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]  no_vest
[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]  no_helmet
[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]  goggle
[0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]  object
