In [21]:
 import os
import re  # Import regex for better filename extraction
import numpy as np
import csv

def pgm_to_csv(pgm_file, csv_file, threshold=128):
    with open(pgm_file, 'r') as f:
        lines = f.readlines()
    
    # Extract pixel values (skip the header)
    pixel_values = []
    for line in lines[4:]:  # Skip first 4 header lines
        pixel_values.extend(map(int, line.split()))
    
    # Convert to 18x18 numpy array
    image_matrix = np.array(pixel_values).reshape(18, 18)
    
    # Apply threshold (black = 1, white = 0)
    binary_matrix = (image_matrix < threshold).astype(int)
    
    # Save as CSV (comma-separated, no header)
    with open(csv_file, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerows(binary_matrix)

def batch_convert_pgm_to_csv(input_dir, student_number):
    if not os.path.exists(input_dir):
        print(f"Input directory {input_dir} does not exist.")
        return
    
    for filename in os.listdir(input_dir):
        if filename.endswith('.pgm'):
            name_parts = filename[:-4]  # Remove ".pgm"
            
            # Extract label and index using regex (matches letters + digits at the end)
            match = re.match(r"([a-zA-Z]+)_?(\d+)$", name_parts)
            if match:
                label, index = match.groups()
                index = index.zfill(2)  # Ensure the index is always two digits (e.g., "07")
                
                csv_filename = f"{student_number}_{label}_{index}.csv"
                pgm_to_csv(os.path.join(input_dir, filename), os.path.join(input_dir, csv_filename))
                print(f"Converted {filename} -> {csv_filename}")
            else:
                print(f"Skipping {filename} - Filename format is incorrect.")

# Example usage
STUDENT_NUMBER = "40415474"  # Replace with your actual student number
INPUT_DIR = "images"  # Folder containing .pgm files (and where CSV files will be saved)

batch_convert_pgm_to_csv(INPUT_DIR, STUDENT_NUMBER)

Converted i2.pgm -> 40415474_i_02.csv
Converted h6.pgm -> 40415474_h_06.csv
Converted sad3.pgm -> 40415474_sad_03.csv
Converted e1.pgm -> 40415474_e_01.csv
Converted d5.pgm -> 40415474_d_05.csv
Converted g8.pgm -> 40415474_g_08.csv
Converted d4.pgm -> 40415474_d_04.csv
Converted sad2.pgm -> 40415474_sad_02.csv
Converted h7.pgm -> 40415474_h_07.csv
Converted i3.pgm -> 40415474_i_03.csv
Converted h5.pgm -> 40415474_h_05.csv
Converted i1.pgm -> 40415474_i_01.csv
Converted d6.pgm -> 40415474_d_06.csv
Converted e2.pgm -> 40415474_e_02.csv
Converted e3.pgm -> 40415474_e_03.csv
Converted d7.pgm -> 40415474_d_07.csv
Converted sad1.pgm -> 40415474_sad_01.csv
Converted h4.pgm -> 40415474_h_04.csv
Converted sad5.pgm -> 40415474_sad_05.csv
Converted j8.pgm -> 40415474_j_08.csv
Converted i4.pgm -> 40415474_i_04.csv
Converted d3.pgm -> 40415474_d_03.csv
Converted e7.pgm -> 40415474_e_07.csv
Converted e6.pgm -> 40415474_e_06.csv
Converted d2.pgm -> 40415474_d_02.csv
Converted i5.pgm -> 40415474_i_05.