In [12]:
import json
import os
import shutil
from PIL import Image
import numpy as np
import joblib

In [13]:
labels_to_idx_file = "models/random_forest_labels_to_idx_more_data.json"
random_forest_model_file = "models/best_random_forest_model_more_data_2023-11-16_1527.pkl"

raw_data_folder = 'raw_data_full'
sorted_data_folder = 'auto_sorted_data'

preprocess_image_size = (128,128)

In [14]:
# Load the saved category mapping
with open(labels_to_idx_file, 'r') as f:
    label_to_idx = json.load(f)

# Invert the mapping to get a mapping from index to category name
idx_to_label = {v: k for k, v in label_to_idx.items()}

In [15]:
idx_to_label
labels_to_idx_file
label_to_idx

{'bubble': 0, 'enough': 1, 'not_enough': 2}

In [16]:

# pre-process images
def preprocess_image(image_path, size=preprocess_image_size):
    """Load an image, resize, and flatten it."""
    image = Image.open(image_path)
    image = image.resize(size)
    if image.mode != 'L':
        image = image.convert('L')
    np_image = np.array(image).flatten()
    return np_image

# Load the trained model
best_model = joblib.load(random_forest_model_file)

# Ensure sorted_data folder and category subfolders exist
if not os.path.exists(sorted_data_folder):
    os.makedirs(sorted_data_folder)
for category in idx_to_label.values():
    category_path = os.path.join(sorted_data_folder, category)
    if not os.path.exists(category_path):
        os.makedirs(category_path)

# Process and sort each image
for filename in os.listdir(raw_data_folder):
    file_path = os.path.join(raw_data_folder, filename)
    if os.path.isfile(file_path):
        try:
            image_data = preprocess_image(file_path)
            predicted_category_index = best_model.predict([image_data])[0]
            predicted_category = idx_to_label[predicted_category_index]

            # Move the file
            destination_path = os.path.join(sorted_data_folder, predicted_category, filename)
            shutil.move(file_path, destination_path)
            print(f'Moved {filename} to {destination_path}')
        except Exception as e:
            print(f"Error processing {filename}: {e}")


Moved LoadandBleach_10122023_0033563.tif to auto_sorted_data/bubble/LoadandBleach_10122023_0033563.tif
Moved LoadandBleach_10122023_0033577.tif to auto_sorted_data/bubble/LoadandBleach_10122023_0033577.tif
Moved LoadandBleach_10122023_0044099.tif to auto_sorted_data/bubble/LoadandBleach_10122023_0044099.tif
Moved LoadandBleach_10122023_0043906.tif to auto_sorted_data/bubble/LoadandBleach_10122023_0043906.tif
Moved LoadandBleach_10122023_0043912.tif to auto_sorted_data/bubble/LoadandBleach_10122023_0043912.tif
Moved LoadandBleach_10122023_0043721.tif to auto_sorted_data/not_enough/LoadandBleach_10122023_0043721.tif
Moved LoadandBleach_10122023_0043735.tif to auto_sorted_data/bubble/LoadandBleach_10122023_0043735.tif
Moved LoadandBleach_10122023_0054249.tif to auto_sorted_data/bubble/LoadandBleach_10122023_0054249.tif
Moved LoadandBleach_10122023_0044072.tif to auto_sorted_data/not_enough/LoadandBleach_10122023_0044072.tif
Moved LoadandBleach_10122023_0054261.tif to auto_sorted_data/enou