In [17]:
import os  # Reference: https://docs.python.org/3/library/os.html
import numpy as np  # Reference: https://numpy.org/doc/
import csv  # Reference: https://docs.python.org/3/library/csv.html
from scipy.ndimage import label, generate_binary_structure  # Reference: https://docs.scipy.org/doc/scipy/reference/ndimage.html

def compute_symmetry_score(matrix):
    """
    Compute the vertical symmetry score of the image matrix.
    """
    # Reference: https://numpy.org/doc/stable/reference/generated/numpy.fliplr.html
    left_half = matrix[:, :9]  # Left half of the image
    right_half = np.fliplr(matrix[:, 9:])  # Flipped right half of the image
    return np.sum(left_half == right_half) / left_half.size  # Symmetry score


def calculate_features(image_matrix):
    """
    Calculate the 16 features for a given 18x18 binary image matrix.
    """
    features = {}
    
    # Feature 1: Number of black pixels
    # Reference: https://numpy.org/doc/stable/reference/generated/numpy.sum.html
    features['nr_pix'] = np.sum(image_matrix)
    
    # Feature 2: Number of rows with exactly 1 black pixel
    features['rows_with_1'] = np.sum(np.sum(image_matrix, axis=1) == 1)
    
    # Feature 3: Number of columns with exactly 1 black pixel
    features['cols_with_1'] = np.sum(np.sum(image_matrix, axis=0) == 1)
    
    # Feature 4: Number of rows with 3 or more black pixels
    features['rows_with_3p'] = np.sum(np.sum(image_matrix, axis=1) >= 3)
    
    # Feature 5: Number of columns with 3 or more black pixels
    features['cols_with_3p'] = np.sum(np.sum(image_matrix, axis=0) >= 3)
    
    # Feature 6: Aspect ratio
    # Reference: https://numpy.org/doc/stable/reference/generated/numpy.where.html
    rows, cols = np.where(image_matrix == 1)
    if len(rows) > 0:
        height = np.max(rows) - np.min(rows) + 1
        width = np.max(cols) - np.min(cols) + 1
        features['aspect_ratio'] = width / height if height != 0 else 0
    else:
        features['aspect_ratio'] = 0
    
    # Feature 7: Number of black pixels with only 1 black pixel neighbour
    # Reference: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
    padded_matrix = np.pad(image_matrix, 1, mode='constant', constant_values=0)
    neigh_1 = 0
    for i in range(1, 19):
        for j in range(1, 19):
            if padded_matrix[i, j] == 1:
                neighbours = padded_matrix[i-1:i+2, j-1:j+2].sum() - 1
                if neighbours == 1:
                    neigh_1 += 1
    features['neigh_1'] = neigh_1
    
    # Feature 8: Number of black pixels with no black pixel neighbours above
    no_neigh_above = 0
    for i in range(1, 19):
        for j in range(1, 19):
            if padded_matrix[i, j] == 1 and padded_matrix[i-1, j-1:j+2].sum() == 0:
                no_neigh_above += 1
    features['no_neigh_above'] = no_neigh_above
    
    # Feature 9: Number of black pixels with no black pixel neighbours below
    no_neigh_below = 0
    for i in range(1, 19):
        for j in range(1, 19):
            if padded_matrix[i, j] == 1 and padded_matrix[i+1, j-1:j+2].sum() == 0:
                no_neigh_below += 1
    features['no_neigh_below'] = no_neigh_below
    
    # Feature 10: Number of black pixels with no black pixel neighbours to the left
    no_neigh_left = 0
    for i in range(1, 19):
        for j in range(1, 19):
            if padded_matrix[i, j] == 1 and padded_matrix[i-1:i+2, j-1].sum() == 0:
                no_neigh_left += 1
    features['no_neigh_left'] = no_neigh_left
    
    # Feature 11: Number of black pixels with no black pixel neighbours to the right
    no_neigh_right = 0
    for i in range(1, 19):
        for j in range(1, 19):
            if padded_matrix[i, j] == 1 and padded_matrix[i-1:i+2, j+1].sum() == 0:
                no_neigh_right += 1
    features['no_neigh_right'] = no_neigh_right
    
    # Feature 12: Number of black pixels with no horizontal neighbours
    no_neigh_horiz = 0
    for i in range(1, 19):
        for j in range(1, 19):
            if padded_matrix[i, j] == 1 and padded_matrix[i, j-1] == 0 and padded_matrix[i, j+1] == 0:
                no_neigh_horiz += 1
    features['no_neigh_horiz'] = no_neigh_horiz
    
    # Feature 13: Number of black pixels with no vertical neighbours
    no_neigh_vert = 0
    for i in range(1, 19):
        for j in range(1, 19):
            if padded_matrix[i, j] == 1 and padded_matrix[i-1, j] == 0 and padded_matrix[i+1, j] == 0:
                no_neigh_vert += 1
    features['no_neigh_vert'] = no_neigh_vert
    
    # Feature 14: Number of connected regions
    # Reference: https://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.label.html
    structure = generate_binary_structure(2, 2)
    labeled_matrix, num_features = label(image_matrix, structure)
    features['connected_areas'] = num_features
    
    # Feature 15: Number of eyes
    # Reference: https://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.label.html
    inverted_matrix = 1 - image_matrix
    labeled_inverted, num_eyes = label(inverted_matrix, structure)
    features['eyes'] = num_eyes - 1  # Subtract 1 for the background
    
    # Feature 16: Custom feature - Symmetry
    # Reference: https://numpy.org/doc/stable/reference/generated/numpy.fliplr.html
    features['custom'] = compute_symmetry_score(image_matrix)


    
    ####  this code below doesnt work ####
    #vertical_symmetry = np.sum(np.abs(image_matrix[:, :9] - np.fliplr(image_matrix)[:, :9]))
    #features['custom'] = vertical_symmetry
    

    
    
    return features

def process_images(input_dir, student_number):
    feature_file = f"{student_number}_features.csv"
    csv_files = sorted([f for f in os.listdir(input_dir) if f.endswith(".csv")])

    with open(feature_file, 'w', newline='') as f:
        writer = csv.writer(f)
        header = ["label", "index", "nr_pix", "rows_with_1", "cols_with_1", "rows_with_3p", "cols_with_3p",
                  "aspect_ratio", "neigh_1", "no_neigh_above", "no_neigh_below", "no_neigh_left", "no_neigh_right",
                  "no_neigh_horiz", "no_neigh_vert", "connected_areas", "eyes", "custom"]
        writer.writerow(header)

        for filename in csv_files:
            label = filename.split('_')[1]  # Extract label
            index = filename.split('_')[2].split('.')[0]  # Extract index

            # Load image matrix
            file_path = os.path.join(input_dir, filename)
            image_matrix = np.loadtxt(file_path, delimiter=',')

            # Compute features
            features = calculate_features(image_matrix)
            row = [label, index] + list(features.values())

            writer.writerow(row)
            print(f"Processed {filename}")

# Example usage
STUDENT_NUMBER = "40415474"
INPUT_DIR = "images"
process_images(INPUT_DIR, STUDENT_NUMBER)

Processed 40415474_a_01.csv
Processed 40415474_a_02.csv
Processed 40415474_a_03.csv
Processed 40415474_a_04.csv
Processed 40415474_a_05.csv
Processed 40415474_a_06.csv
Processed 40415474_a_07.csv
Processed 40415474_a_08.csv
Processed 40415474_b_01.csv
Processed 40415474_b_02.csv
Processed 40415474_b_03.csv
Processed 40415474_b_04.csv
Processed 40415474_b_05.csv
Processed 40415474_b_06.csv
Processed 40415474_b_07.csv
Processed 40415474_b_08.csv
Processed 40415474_c_01.csv
Processed 40415474_c_02.csv
Processed 40415474_c_03.csv
Processed 40415474_c_04.csv
Processed 40415474_c_05.csv
Processed 40415474_c_06.csv
Processed 40415474_c_07.csv
Processed 40415474_c_08.csv
Processed 40415474_d_01.csv
Processed 40415474_d_02.csv
Processed 40415474_d_03.csv
Processed 40415474_d_04.csv
Processed 40415474_d_05.csv
Processed 40415474_d_06.csv
Processed 40415474_d_07.csv
Processed 40415474_d_08.csv
Processed 40415474_e_01.csv
Processed 40415474_e_02.csv
Processed 40415474_e_03.csv
Processed 40415474_e