In [3]:
pip install ucimlrepo




In [4]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
regensburg_pediatric_appendicitis = fetch_ucirepo(id=938)

# data (as pandas dataframes)
X = regensburg_pediatric_appendicitis.data.features
y = regensburg_pediatric_appendicitis.data.targets

# metadata
print(regensburg_pediatric_appendicitis.metadata)

# variable information
print(regensburg_pediatric_appendicitis.variables)

{'uci_id': 938, 'name': 'Regensburg Pediatric Appendicitis', 'repository_url': 'https://archive.ics.uci.edu/dataset/938/regensburg+pediatric+appendicitis', 'data_url': 'https://archive.ics.uci.edu/static/public/938/data.csv', 'abstract': 'This repository holds the data from a cohort of pediatric patients with suspected appendicitis admitted with abdominal pain to Children’s Hospital St. Hedwig in Regensburg, Germany, between 2016 and 2021. Each patient has (potentially multiple) ultrasound (US) images, aka views, tabular data comprising laboratory, physical examination, scoring results and ultrasonographic findings extracted manually by the experts, and three target variables, namely, diagnosis, management and severity.', 'area': 'Health and Medicine', 'tasks': ['Classification'], 'characteristics': ['Tabular', 'Image'], 'num_instances': 782, 'num_features': 53, 'feature_types': ['Real', 'Categorical', 'Integer'], 'demographics': ['Age', 'Sex'], 'target_col': ['Management', 'Severity',

In [5]:
print(X)

       Age    BMI     Sex  Height  Weight  Length_of_Stay  Alvarado_Score  \
0    12.68  16.90  female   148.0    37.0             3.0             4.0   
1    14.10  31.90    male   147.0    69.5             2.0             5.0   
2    14.14  23.30  female   163.0    62.0             4.0             5.0   
3    16.37  20.60  female   165.0    56.0             3.0             7.0   
4    11.08  16.90  female   163.0    45.0             3.0             5.0   
..     ...    ...     ...     ...     ...             ...             ...   
777  12.41  25.25  female   166.5    70.0             4.0             8.0   
778  17.09  20.43  female   158.0    51.0             6.0             5.0   
779  14.99  19.91  female   152.0    46.0             4.0             5.0   
780   7.20  14.30    male   129.3    23.9             5.0             9.0   
781  11.51  18.17    male   146.5    39.0             4.0             2.0   

     Paedriatic_Appendicitis_Score Appendix_on_US  Appendix_Diameter  ...  

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [19]:
import zipfile, os, re, shutil
from PIL import Image # Import the Image module from PIL

# === 2️⃣ Path to your ZIP file on Google Drive ===
# 👉 Change this path to match your actual file location
zip_path = "/content/drive/MyDrive/US Pictures.zip"

# === 3️⃣ Create folder to extract contents ===
extract_path = "/content/extracted_zip"
os.makedirs(extract_path, exist_ok=True)

# === 4️⃣ Unzip the folder ===
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
print("✅ Unzipped to:", extract_path)

# === 5️⃣ Create output folder for matched images ===
output_folder = "/content/Filtered_Images" # Output will be written here
os.makedirs(output_folder, exist_ok=True)
if os.path.exists(output_folder):
    shutil.rmtree(output_folder) # Remove the folder and its contents if it exists
    print(f"🗑️ Cleared existing output folder: {output_folder}")
os.makedirs(output_folder, exist_ok=True) # Recreate the empty folder
print(f"✅ Created (or ensured empty) output folder: {output_folder}")

# Initialize lists to store loaded images and their original filenames
selected_images = []
selected_filenames = []

# === 6️⃣ Define regex for App / Appendix (FROM YOUR PROVIDED SNIPPET) ===
# This regex matches 'app' or 'appendix' as the main word.
# It allows digits, periods, spaces, underscores, or hyphens BEFORE the keyword.
# It does NOT allow any other English letters or numbers AFTER the keyword.
pattern = re.compile(r'^(?:[\d\.\s_-]*)(app|appendix)$', re.IGNORECASE)

# === 7️⃣ Image extensions allowed (FROM YOUR PROVIDED SNIPPET) ===
allowed_exts = ('.bmp', '.png')

# === 8️⃣ Traverse extracted files and copy/load matches ===
matched_files_copied_paths = [] # This list will store the paths to the COPIED files in output_folder

for root, _, files in os.walk(extract_path):
    for file in files:
        if file.lower().endswith(allowed_exts): # Use the user-defined allowed_exts
            # Remove file extension
            name_without_ext = os.path.splitext(file)[0]
            # Remove leading/trailing spaces
            cleaned_name = name_without_ext.strip()

            # Match only if the cleaned name fits the pattern
            if pattern.match(cleaned_name):
                src = os.path.join(root, file)
                dst = os.path.join(output_folder, file) # Destination in the output_folder

                # Copy the file to the output folder
                shutil.copy2(src, dst)
                matched_files_copied_paths.append(dst) # Track the copied path
                print(f"✅ Copied: {file}")

                # Load the image using PIL and store it
                try:
                    img = Image.open(src).convert('RGB') # Open from source path
                    selected_images.append(img)
                    selected_filenames.append(file) # Store original filename
                except Exception as e:
                    print(f"⚠️ Could not load image {file}: {e}")

# === 9️⃣ Show summary ===
print("\n--- Summary of copied files ---")
print("Total copied images:", len(matched_files_copied_paths))
print("Saved in:", output_folder)

print("\n--- Summary of loaded images (into Python memory) ---")
print("Total images loaded into memory:", len(selected_images))
# print("Filenames of loaded images:", selected_filenames) # Uncomment to see all loaded filenames

✅ Unzipped to: /content/extracted_zip
🗑️ Cleared existing output folder: /content/Filtered_Images
✅ Created (or ensured empty) output folder: /content/Filtered_Images
✅ Copied: 742.2 App.bmp
✅ Copied: 454.2 App.bmp
✅ Copied: 563.5 App.bmp
✅ Copied: 506.4 App.bmp
✅ Copied: 721.3 App.bmp
✅ Copied: 628.1 App.bmp
✅ Copied: 872.2 App.bmp
✅ Copied: 464.3 App.bmp
✅ Copied: 358.2 App.bmp
✅ Copied: 216.1 App.bmp
✅ Copied: 133.2 App.bmp
✅ Copied: 833.4 App.bmp
✅ Copied: 211.4 App.bmp
✅ Copied: 691.6 App.bmp
✅ Copied: 558.2 App.bmp
✅ Copied: 102.19 App.bmp
✅ Copied: 832.2 App.bmp
✅ Copied: 802.4 App.bmp
✅ Copied: 642.3 App.bmp
✅ Copied: 607.1 App.bmp
✅ Copied: 51.3 App.bmp
✅ Copied: 790.5 App.bmp
✅ Copied: 446.1 App.bmp
✅ Copied: 252.2 App.bmp
✅ Copied: 537.6 App.bmp
✅ Copied: 368.2 App.bmp
✅ Copied: 565.1 App.bmp
✅ Copied: 177.2 App.bmp
✅ Copied: 672.4 App.bmp
✅ Copied: 130.4 App.bmp
✅ Copied: 37.1 App .bmp
✅ Copied: 107.3 App.bmp
✅ Copied: 597.4 App.bmp
✅ Copied: 856.1 App.bmp
✅ Copied: 512.2 A