In [None]:
import cv2 # OpenCV for image processing 
import pandas as pd # For handling data frames
import numpy as np # Numpy for numerical operations 
import os # For operating system interactions 
from sklearn.cluster import KMeans # K-means clustering from scikit-learn
import time # To track the time taken for processes
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern, hog # For texture and shape feature extraction
from scipy.stats import skew, kurtosis # For statistical features like skewness and kurtosis
import mimetypes # To guess the MIME type of files
from joblib import Parallel, delayed # For parallel processing 
from tqdm import tqdm # To show progress bars for loops 
from skimage.filters import gabor # For Gabor filter feature extraction
import mahotas  # Library for Zernike moments
import csv  # Import the csv module for handling CSV files
import gc # Import the gc module for garbage collection

# Preprocess Images

In [None]:
def preprocess_image(image_path, apply_denoising):
    """
    Preprocess the image by resizing and optionally applying denoising 
    Parameters:
    image_path(str): The file path to the image
    apply_denosing (bool)：whether to apply bilateral filtering for noise reduction
    Returns:
    image(np.array): the processed image
    gray(np.array): the grayscale version of the image
    """
    try:
        image = cv2.imread(image_path) # Read the image from the given path
        if image is None:
            raise ValueError(f"Error loading image: {image_path}")

        image = cv2.resize(image, (256, 256))  # Resize the image to 256x256 pixels

        if apply_denoising:
            image = cv2.bilateralFilter(image, 9, 75, 75)  # Apply bilateral filtering to reduce noise

        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Convert the image to grayscale
        return image, gray
    except Exception as e:
        print(f"Error preprocessing image {image_path}: {e}")
        return None, None

# Texture Features

Local Binary Pattern (LBP)

In [None]:
def extract_lbp_features(gray, radius=2, n_points=16, method='uniform'):
    """
    Extract Local Binary Pattern (LBP) features from a grayscale imgae
    Parameters:
    gray (np.array): the grayscale image
    radius (int): radius of the circular neighborhood
    n_points (int): number of points to consider for LBP
    method (str): method for LBP ('uniform' or other)
    Returns:
    hist (list): normalized histogram of LBP features
    """
    try:
        lbp = local_binary_pattern(gray, n_points, radius, method) # compute LBP
        hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2)) # Histogram of LBP
        hist = hist.astype(np.float32)
        hist /= hist.sum() # Normalize the histogram
        return hist.tolist()
    except Exception as e:
        print(f"Error extracting LBP features: {e}")
        return [0] * (n_points + 2)

Gray Level Co-occurrence Matrix (GLCM)

In [None]:
def extract_glcm_features(gray, distances=[1, 2], angles=[0, np.pi/4]):
    """
    Extract Gray Level Co-occurrence Matrix (GLCM) features from a grayscale image
    Parameters:
    gray (np.array): the grayscale image
    distances (list): list of distances for GLCM computation
    angles (list): list of angles for GLCM computation
    Returns:
    features (list): list of GLCM features including contrast, dissimilarity, homogeneity, energy, correlation, ASM
    """
    features = []
    for d in distances:
        for a in angles:
            glcm = graycomatrix(gray, distances=[d], angles=[a], levels=256, symmetric=True, normed=True) # compute GLCM
            features.append(graycoprops(glcm, 'contrast')[0, 0]) # add contrast feature
            features.append(graycoprops(glcm, 'dissimilarity')[0, 0]) # add dissimilarity feature
            features.append(graycoprops(glcm, 'homogeneity')[0, 0]) #add homogeneity feature
            features.append(graycoprops(glcm, 'energy')[0, 0]) # add energy feature
            features.append(graycoprops(glcm, 'correlation')[0, 0]) # add correlation feature
            features.append(graycoprops(glcm, 'ASM')[0, 0]) # add ASM feature (angular second moment)
    return features

Histogram of Oriented Gradients (HOG)

In [None]:
def extract_hog_features(gray, pixels_per_cell=[(8, 8), (16, 16)], cells_per_block=[(2, 2), (3, 3)]):
    """
    extract histogram of oriented gradients (HOG) features from a grayscale image

    parameters:
    gray (np.array): the grayscale image
    pixels_per_cell (list): list of tuples specifiyign the size of each cell in pixels
    cells_per_block (list): list of tuples specifiying the number of cells in each block

    returns:
    features (list): flattened list of hog features
    """
    features = []
    for ppc in pixels_per_cell:
        for cpb in cells_per_block:
            hog_features = hog(gray, orientations=9, pixels_per_cell=ppc, cells_per_block=cpb, block_norm='L2-Hys', transform_sqrt=True)
            features.extend(hog_features) # append hog features
    return features

Gabor Filters

In [None]:
def extract_gabor_features(gray, num_kernels=8):
    """
    extract gabor filter features from a grayscale image

    parameters:
    gray (np.array): the grayscale image
    num_kernels (int): number of gabor kernels to apply

    returns:
    features (list): list of mean and variance values for each kernel
    """
    features = []
    for i in range(num_kernels):
        theta = i * (np.pi / num_kernels)
        for frequency in [0.05, 0.1, 0.2, 0.3]:
            gabor_filt_real, _ = gabor(gray, frequency=frequency, theta=theta)
            features.extend([gabor_filt_real.mean(), gabor_filt_real.var()]) # append mean and variance of the filter response
    return features

# Color features

Color Moments

In [None]:
def extract_color_moments(image):
    """
    extract color moments (mean, standard deviation, skewness and kurtosis) from an image
    parameters:
    image (np.array): the input image
    returns:
    features (list): list of color moment features for each channel in different color spaces
    """
    features = []
    for i in range(3):
        channel = image[:, :, i]
        features.append(np.mean(channel)) # mean
        features.append(np.std(channel)) # standard deviation
        features.append(skew(channel.flatten())) # skewness
        features.append(kurtosis(channel.flatten())) # kurtosis

    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    for i in range(3):
        channel = hsv[:, :, i]
        features.append(np.mean(channel))
        features.append(np.std(channel))
        features.append(skew(channel.flatten()))
        features.append(kurtosis(channel.flatten()))

    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    for i in range(3):
        channel = lab[:, :, i]
        features.append(np.mean(channel))
        features.append(np.std(channel))
        features.append(skew(channel.flatten()))
        features.append(kurtosis(channel.flatten()))

    return features

Color Histogram

In [None]:
def extract_color_histogram(image, bins=32):
    """
    extract color histogram from an image in multiple color spaces
    parameters:
    image (np.array): the input image
    bins (int): number of bins for the histgram
    returns:
    features (list): flattened list of histogram features for each channel in different color space
    """
    features = []
    for i in range(3):
        hist = cv2.calcHist([image], [i], None, [bins], [0, 256]) # compute histogram for each channel
        cv2.normalize(hist, hist) # normalize the histogram
        features.extend(hist.flatten())

    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    for i in range(3):
        hist = cv2.calcHist([hsv], [i], None, [bins], [0, 256])
        cv2.normalize(hist, hist)
        features.extend(hist.flatten())
    
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    for i in range(3):
        hist = cv2.calcHist([lab], [i], None, [bins], [0, 256])
        cv2.normalize(hist, hist)
        features.extend(hist.flatten())

    return features

Color Coherence Vector

In [None]:
def extract_color_coherence_vector(image, bins=32):
    """
    extract the color coherence vector (CCV) from an image
    parameters:
    image (np.array): the input image
    bins (int): number of bins for the ccv
    returns:
    features (list): concatenated list of coherent and incoherent pixel counts for each bin
    """
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    features = []

    for i in range(3):
        channel = hsv[:, :, i].astype(np.int16)  # convert pixel values to int16
        hist, _ = np.histogram(channel, bins=bins, range=(0, 256)) # compute histogram
        coherent_pixels = np.zeros(bins, dtype=np.int32)
        incoherent_pixels = np.zeros(bins, dtype=np.int32)

        for j in range(1, channel.shape[0] - 1):
            for k in range(1, channel.shape[1] - 1):
                current_pixel = channel[j, k]
                bin_index = int(current_pixel * (bins - 1) // 255)
                
                if (np.abs(current_pixel - channel[j-1, k-1]) <= 1 and
                    np.abs(current_pixel - channel[j-1, k]) <= 1 and
                    np.abs(current_pixel - channel[j-1, k+1]) <= 1 and
                    np.abs(current_pixel - channel[j, k-1]) <= 1 and
                    np.abs(current_pixel - channel[j, k+1]) <= 1 and
                    np.abs(current_pixel - channel[j+1, k-1]) <= 1 and
                    np.abs(current_pixel - channel[j+1, k]) <= 1 and
                    np.abs(current_pixel - channel[j+1, k+1]) <= 1):
                    coherent_pixels[bin_index] += 1 # coherent pixel
                else:
                    incoherent_pixels[bin_index] += 1 # incoherent pixel

        features.extend(coherent_pixels)
        features.extend(incoherent_pixels)

    return features

Color Name Histogram

In [None]:
# converting RGB to HEX
def RGB2HEX(color): 
    """
    convert an RGB color to HEX format

    parameters:
    color (tuple): a tuple representing the RGB color

    returns:
    str: the HEX representation of the color
    """
    return "#{:02x}{:02x}{:02x}".format(int(color[0]), int(color[1]), int(color[2]))

In [None]:
# assigning color names
def assign_color_name(color):
    """
    assign a name to an RGB color based on the closest predefined color

    parameters:
    color(tuple): a tuple representing the RGB color

    returns:
    str: the name of the cloest color
    """
    color_names = {
        '#000000': 'black',
        '#ffffff': 'white',
        '#ff0000': 'red',
        '#00ff00': 'green',
        '#0000ff': 'blue',
        '#ffff00': 'yellow',
        '#ff00ff': 'magenta',
        '#00ffff': 'cyan',
        '#800000': 'maroon',
        '#808000': 'olive',
        '#008000': 'green',
        '#800080': 'purple',
        '#008080': 'teal',
        '#000080': 'navy'
    }
    
    min_dist = float('inf')
    min_color = None
    
    for c, name in color_names.items():
        r, g, b = int(c[1:3], 16), int(c[3:5], 16), int(c[5:7], 16)
        dist = np.sqrt((color[0]-r)**2 + (color[1]-g)**2 + (color[2]-b)**2)
        if dist < min_dist:
            min_dist = dist
            min_color = name
    
    return min_color


In [None]:
# extracting color name histogram
def extract_color_name_histogram(image, bins=14):
    """
    extract a histogram of color names from an image

    parameters:
    image(np.array): the input image
    bins(int): the number of color bins

    returns:
    list: histogram of color names
    """
    image_resized = cv2.resize(image, (256, 256), interpolation=cv2.INTER_NEAREST)
    image_hex = [RGB2HEX(color) for row in image_resized for color in row]
    
    color_names = [assign_color_name(list(int(h[i:i+2], 16) for i in (1, 3, 5))) for h in image_hex]
    
    color_name_hist = []
    for color_name in ['black', 'white', 'red', 'green', 'blue', 'yellow', 'magenta', 'cyan', 'maroon', 'olive', 'green', 'purple', 'teal', 'navy']:
        count = color_names.count(color_name)
        color_name_hist.append(count)
        
    return color_name_hist

# Shape features

Hu Moments

In [None]:
def extract_hu_moments(gray):
    """
    extract hu moments from a grayscale image, which are shape features invariant to scale, translation and rotation

    parameters:
    gray (np.array): the grayscale image

    returns:
    list: hu moments as a list of 7 values
    """
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if contours:
        contour = max(contours, key=cv2.contourArea) # find the largest contour by area
        moments = cv2.moments(contour) # calculate moments 
        hu_moments = cv2.HuMoments(moments) # calculate hu moments
        return list(hu_moments.flatten())
    else:
        return [0] * 7 # return a list of zeros if no contours are found

Morphological Features

In [None]:
def extract_morphological_features(gray):
    """
    extract morphological features such as area, perimeter, aspect ratio, circularity and solidity 

    parameters:
    gray(np.array): the grayscale image

    returns:
    list: a list containing area, perimeter, aspect ratio, circularity and solidity
    """
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if contours:
        contour = max(contours, key=cv2.contourArea) # find the largest contour by area

        area = cv2.contourArea(contour) # calculate area
        perimeter = cv2.arcLength(contour, True) # calculate perimeter 
        _, _, w, h = cv2.boundingRect(contour) # bounding rectangle to get width and height
        aspect_ratio = float(w) / h if h != 0 else 0 # aspect ratio (width/height)
        circularity = 4 * np.pi * area / (perimeter ** 2) if perimeter != 0 else 0 # circularity 

        hull = cv2.convexHull(contour) # convex hull
        hull_area = cv2.contourArea(hull) # hull area
        solidity = float(area) / hull_area if hull_area != 0 else 0 # solidity 

        return [area, perimeter, aspect_ratio, circularity, solidity]
    else:
        return [0] * 5 # return a list of zeros if no contours are found 

Fourier Descriptors

In [None]:
def extract_fourier_descriptor(gray, n_descriptors=32):
    """
    extract fourier descriptors from a grayscale image for shape analysis

    parameters:
    gray(np.array): the grayscale image
    n_descriptors(int): number of fourier descriptors to extract

    returns:
    list: list of fourier descriptors
    """
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if contours:
        contour = max(contours, key=cv2.contourArea) # find the largest contour by area
        contour = contour.reshape(-1, 2) # reshape for fourier transform 
        contour_complex = np.empty(contour.shape[:-1], dtype=complex) 
        contour_complex.real = contour[:, 0]
        contour_complex.imag = contour[:, 1]
        fourier_result = np.fft.fft(contour_complex) # compute fourier descriptors
        fourier_result = fourier_result[:n_descriptors] 
        fourier_result = np.fft.fftshift(fourier_result) # shift zero frequency to the center
        return np.abs(fourier_result).tolist()
    else:
        return [0] * n_descriptors # return a list of zeros if no contours are found


Zernike Moments

In [None]:
def extract_zernike_moments(gray, radius, degree):
    """
    extract zernike moments from a grayscale image, which are useful for shape recognition
    parameters:
    gray(np.array): the grayscale image
    radius(int): radius of the circular region for moment calculation
    degree(int): the degree of the zernike moments
    returns:
    list: zernike moments as a list of values
    """
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    moments = mahotas.features.zernike_moments(binary, radius, degree) # compute zernike moments
    return moments.tolist()

# Extracting All Features from an Image

In [None]:
def extract_features_from_image(image, gray):
    """
    extract a comprehensive set of features from an imagem, including texture, color and shape features

    parameters:
    image(np.array): the input image
    gray(np.array): the grayscale version of the image

    returns:
    dict: a dictionary containing all the extract features
    """
    features = {}

    # Texture features
    lbp_features = extract_lbp_features(gray, radius=1, n_points=8, method='uniform')
    if lbp_features is not None:
        for i, value in enumerate(lbp_features):
            features[f'LBP_Pattern_{i}'] = value

    glcm_features = extract_glcm_features(gray, distances=[1, 2, 3], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4])
    for i, value in enumerate(glcm_features):
        features[f'GLCM_Feature_{i}'] = value

    hog_features = extract_hog_features(gray, pixels_per_cell=[(16, 16)], cells_per_block=[(3, 3)])
    for i, value in enumerate(hog_features):
        features[f'HOG_{i}'] = value

    gabor_features = extract_gabor_features(gray, num_kernels=16)
    for i, value in enumerate(gabor_features):
        features[f'Gabor_Feature_{i}'] = value

    # Color features
    color_moments = extract_color_moments(image)
    for i, value in enumerate(color_moments):
        features[f'Color_Moment_{i}'] = value

    color_histogram = extract_color_histogram(image, bins=16)
    for i, value in enumerate(color_histogram):
        features[f'Color_Histogram_{i}'] = value

    color_coherence_vector = extract_color_coherence_vector(image, bins=16)
    for i, value in enumerate(color_coherence_vector):
        features[f'Color_Coherence_Vector_{i}'] = value

    color_name_histogram = extract_color_name_histogram(image, bins=14)
    for i, value in enumerate(color_name_histogram):
        features[f'Color_Name_Histogram_{i}'] = value

    # Shape features
    hu_moments = extract_hu_moments(gray)
    for i, value in enumerate(hu_moments):
        features[f'Hu_Moment_{i}'] = value

    morphological_features = extract_morphological_features(gray)
    features['Contour_Area'] = morphological_features[0]
    features['Contour_Perimeter'] = morphological_features[1]
    features['Contour_AspectRatio'] = morphological_features[2]
    features['Contour_Circularity'] = morphological_features[3]
    features['Contour_Solidity'] = morphological_features[4]

    fourier_descriptor = extract_fourier_descriptor(gray, n_descriptors=16)
    for i, value in enumerate(fourier_descriptor):
        features[f'Fourier_Descriptor_{i}'] = value

    zernike_moments = extract_zernike_moments(gray, radius=128, degree=4)
    for i, value in enumerate(zernike_moments):
        features[f'Zernike_Moment_{i}'] = value

    return features

# Extracting Features from an Image Path

In [None]:
def extract_features(image_path, apply_denoising=False, augment=False):
    """
    extract features from an image located at the given path. this includes optional preprocessing steps like denoising and augmentation

    parameters:
    image_path(str): the file path to the image
    apply_denoising(bool): whether to apply denoising using bilateral filtering 
    augment(bool): whether to apply data augmentation techniques, such as horizontal flipping

    returns:
    dict: a dictionary containing all the extracted features
    """
    try:
        image_path = str(image_path) # ensure the image path is a string 

        if not os.path.exists(image_path):
            print(f"Image not found: {image_path}")
            return {}

        image, gray = preprocess_image(image_path, apply_denoising) # preprocess the image (resize, denoise, grayscale)
        if image is None or gray is None:
            return {}

        features = extract_features_from_image(image, gray) # extract all features from the preprocessed image

        if augment:
            # apply horizontal flip augmentation if specified
            image_flipped = cv2.flip(image, 1) #flip the image horizontally
            gray_flipped = cv2.flip(gray, 1) # flip the grayscale image horizontally

            # extract features from the flipped image
            features_flipped = extract_features_from_image(image_flipped, gray_flipped)

            # add the flipped image features to the original features, prefixed with 'flipped_'
            features.update({f'flipped_{k}': v for k, v in features_flipped.items()})

        return features
    except Exception as e:
        print(f"Error processing image {image_path}: {str(e)}")
        return {}

# Main Execution 

In [None]:
if __name__ == '__main__':
    """
    main block of the script
    it handles the process of extracting features from images in a dataset and saving them to csv files
    """

    root_path = '/Users/tony/Desktop/coffeebeans/DataSet' #define the root path where the dataset is stored
    os.chdir(root_path) # change the current working directory to the dataset path 

    label, path, names = [], [], [] # initialize lists to hold image paths, labels and filenames

    # Iterate through each folder in the dataset directory
    for folder_name in os.listdir(root_path):
        if folder_name not in ['.DS_Store', 'code_oldversion', '.git']: # skip unnecessary directories
            current_dir = os.path.join(root_path, folder_name) # construct the full path to the current dirctory
            for root, dirs, files in os.walk(current_dir): # walk through the directory tree
                for file in files:
                    file_mimetype = mimetypes.guess_type(file)[0] # guess the file's MIME type
                    if file_mimetype == 'image/jpeg': # process only JPEG images
                        path.append(os.path.join(root, file)) # store the full file path 
                        label.append(folder_name) # store the lable (which is the folder name)
                        names.append(file) # store the filename

    # create a dataframe to organize the data
    df = pd.DataFrame({'path': path, 
                       'filename': names, 
                       'label': label})
    
    print(df.head()) # display the first few rows of the dataframe
    print(df.tail()) # display the last few rows of the dataframe

    # define the names of the csv files where the features will be saved
    csv_names = ['original_features.csv', 
                 'original_features_augmented.csv', 
                 'denoised_features.csv', 
                 'denoised_features_augmented.csv']

    # adjust batch_size based on the memory available on the macbook
    batch_size = 2000

    # iterate over the different combinations of preprocessing options (denoising and augmentation)
    for i, (apply_denoising, augment) in enumerate([(False, False), (False, True), (True, False), (True, True)]):
        with open(csv_names[i], 'w', newline='') as csv_file: # open the corresponding csv file for writing
            # prepare the header of the csv file, including the feature names
            fieldnames = ['path', 'filename', 'label'] + [f for f in extract_features(df['path'][0], apply_denoising, augment).keys()]
            writer = csv.DictWriter(csv_file, fieldnames=fieldnames) # initialize the csv writer
            writer.writeheader() # write the header row

            # process images in batches to manage memory usage
            for batch_start in range(0, len(df), batch_size):
                batch_end = min(batch_start + batch_size, len(df)) #determine the end index of the batch
                batch_df = df[batch_start:batch_end] # extract the current batch from the dataframe

                start_time = time.time() # record the start time of the batch processing 
                features_list = Parallel(n_jobs=-1)(delayed(extract_features)(path, 
                apply_denoising, augment) for path in tqdm(batch_df['path'], desc=f"Processing images ({'original' if not apply_denoising else 'denoised'}
                                                           {', augmented' if augment else ''}, batch {batch_start // batch_size + 1})"))

                for feature_dict in features_list:
                    if feature_dict:
                        row = {**batch_df.iloc[features_list.index(feature_dict)].to_dict(), **feature_dict} # merge image metadata with extracted features
                        writer.writerow(row) # write the combined data to the csv file
  
                end_time = time.time() # record the end time of the batch processing 
                print(f"Finished processing batch {batch_start // batch_size + 1} in {end_time - start_time:.2f} seconds.")# print the time taken for this batch 

                gc.collect()  # manually trigger garbage collection to free memory 

        print(f"Finished processing all batches for {'original' if not apply_denoising else 'denoised'}{', augmented' if augment else ''} images.") # comfirm completion of all batches for the curent csv

    print("Finished initial processing of all images.") # comfirm completion of all image processing 

    #check if any of the expected csv files are missing
    missing_files = [file for file in csv_names if not os.path.exists(file)]

    if not missing_files:
        print("All expected CSV files have been generated successfully.")
    else:
        print(f"The following CSV files are missing: {', '.join(missing_files)}")
        print("Regenerating missing files...")

        # regenerate missing csv files
        for i, (apply_denoising, augment) in enumerate([(False, False), (False, True), (True, False), (True, True)]):
            if csv_names[i] in missing_files:
                with open(csv_names[i], 'w', newline='') as csv_file:
                    fieldnames = ['path', 'filename', 'label'] + [f for f in extract_features(df['path'][0], apply_denoising, augment).keys()]
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
                    writer.writeheader()

                    for batch_start in range(0, len(df), batch_size):
                        batch_end = min(batch_start + batch_size, len(df))
                        batch_df = df[batch_start:batch_end]

                        start_time = time.time()
                        features_list = Parallel(n_jobs=-1)(delayed(extract_features)(path, apply_denoising, augment) for path in tqdm(batch_df['path'], desc=f"Regenerating features for {'original' if not apply_denoising else 'denoised'}{', augmented' if augment else ''} images, batch {batch_start // batch_size + 1}"))

                        for feature_dict in features_list:
                            if feature_dict:
                                row = {**batch_df.iloc[features_list.index(feature_dict)].to_dict(), **feature_dict}
                                writer.writerow(row)

                        end_time = time.time()
                        print(f"Finished regenerating batch {batch_start // batch_size + 1} in {end_time - start_time:.2f} seconds.")

                        gc.collect()  # manually trigger garbage collection 

                print(f"Finished regenerating {csv_names[i]}.")

    print("Program completed.") # comfirm the completion of the entire program