In [8]:
import os
import json
import random
from sklearn.model_selection import train_test_split
json_file = 'module_metadata.json'
output_dir = 'dataset' 
with open(json_file, 'r') as f:
    labels = json.load(f)

print("Loaded labels from JSON file:")
for key, value in labels.items():
    print(f"ID: {key}, Image Path: {value['image_filepath']}, Label: {value['anomaly_class']}")


data = []
for img_id, info in labels.items():
    img_path = os.path.join(info['image_filepath'])
    if os.path.isfile(img_path): 
        data.append({'image_filepath': img_path, 'anomaly_class': info['anomaly_class']})
    else:
        print(f"Warning: File not found - {img_path}")


print(f"Number of valid images found: {len(data)}")


if len(data) == 0:
    raise ValueError("The dataset is empty. Check the image paths and labels.")

random.shuffle(data)

train_data, temp_data = train_test_split(data, test_size=0.3, random_state=42, stratify=[d['anomaly_class'] for d in data])
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42, stratify=[d['anomaly_class'] for d in temp_data])

train_json_path = os.path.join(output_dir, 'train_labels.json')
val_json_path = os.path.join(output_dir, 'val_labels.json')
test_json_path = os.path.join(output_dir, 'test_labels.json')

with open(train_json_path, 'w') as f:
    json.dump(train_data, f, indent=4)

with open(val_json_path, 'w') as f:
    json.dump(val_data, f, indent=4)

with open(test_json_path, 'w') as f:
    json.dump(test_data, f, indent=4)

print("Data split and saved successfully.")

Loaded labels from JSON file:
ID: 13357, Image Path: images/13357.jpg, Label: No-Anomaly
ID: 13356, Image Path: images/13356.jpg, Label: No-Anomaly
ID: 19719, Image Path: images/19719.jpg, Label: No-Anomaly
ID: 11542, Image Path: images/11542.jpg, Label: No-Anomaly
ID: 11543, Image Path: images/11543.jpg, Label: No-Anomaly
ID: 11540, Image Path: images/11540.jpg, Label: No-Anomaly
ID: 11541, Image Path: images/11541.jpg, Label: No-Anomaly
ID: 11546, Image Path: images/11546.jpg, Label: No-Anomaly
ID: 11547, Image Path: images/11547.jpg, Label: No-Anomaly
ID: 11544, Image Path: images/11544.jpg, Label: No-Anomaly
ID: 11545, Image Path: images/11545.jpg, Label: No-Anomaly
ID: 14545, Image Path: images/14545.jpg, Label: No-Anomaly
ID: 11548, Image Path: images/11548.jpg, Label: No-Anomaly
ID: 11549, Image Path: images/11549.jpg, Label: No-Anomaly
ID: 16426, Image Path: images/16426.jpg, Label: No-Anomaly
ID: 19927, Image Path: images/19927.jpg, Label: No-Anomaly
ID: 5988, Image Path: imag