In [1]:
import zipfile
from pathlib import Path
import os

In [3]:
def extract_mprage_files(zip_path, target_folder):
    """
    Extract all MPRAGE files from zip, maintaining subject structure
    """
    import zipfile
    from pathlib import Path
    
    # Create target folder
    target_base = Path(target_folder)
    target_base.mkdir(exist_ok=True)
    
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        all_items = zip_ref.namelist()
        
        # Find all MPRAGE-related items
        mprage_items = []
        for item in all_items:
            if 'MPRAGE' in item or 'MP-RAGE' in item:
                mprage_items.append(item)
        
        print(f"üîç Found {len(mprage_items)} MPRAGE items")
        
        # Extract each MPRAGE item
        extracted_count = 0
        for item in mprage_items:
            try:
                # Parse the path: ADNI/133_S_0525/MPRAGE/...
                parts = item.split('/')
                
                # Find subject ID (e.g., "133_S_0525")
                subject_id = None
                for part in parts:
                    if '_S_' in part:
                        subject_id = part
                        break
                
                if subject_id:
                    # Create target path: target_folder/ADNI/subject_id/MPRAGE/...
                    # Remove the first part (zip root) and keep the rest
                    relative_path = '/'.join(parts[1:])  # Skip first empty part
                    target_path = target_base / relative_path
                    
                    # Create parent directories
                    target_path.parent.mkdir(parents=True, exist_ok=True)
                    
                    # Extract the file
                    with zip_ref.open(item) as source, open(target_path, 'wb') as target:
                        target.write(source.read())
                    
                    extracted_count += 1
                    if extracted_count <= 10:  # Show first 10
                        print(f"   üìÅ Extracted: {relative_path}")
                
            except Exception as e:
                print(f"‚ùå Error extracting {item}: {e}")
        
        print(f"‚úÖ Successfully extracted {extracted_count} MPRAGE files")
        return extracted_count

# Usage
zip_path = r"D:\ADNI\AD_CN\proteomics\Biomarkers Consortium Plasma Proteomics MRM\MRI\MRI_5.zip"
target_folder = r"D:\ADNI\AD_CN\proteomics\Biomarkers Consortium Plasma Proteomics MRM\MRI\MRI_2"

print(f"üöÄ Extracting MPRAGE files from: {zip_path}")
print(f"üìÅ Target folder: {target_folder}")

extracted_count = extract_mprage_files(zip_path, target_folder)

print(f"\nüéØ Extraction complete! Found {extracted_count} MPRAGE files")
print(f"ÔøΩÔøΩ Files extracted to: {target_folder}")

üöÄ Extracting MPRAGE files from: D:\ADNI\AD_CN\proteomics\Biomarkers Consortium Plasma Proteomics MRM\MRI\MRI_5.zip
üìÅ Target folder: D:\ADNI\AD_CN\proteomics\Biomarkers Consortium Plasma Proteomics MRM\MRI\MRI_2
üîç Found 208 MPRAGE items
   üìÅ Extracted: 002_S_0413/Accelerated_Sagittal_MPRAGE__MSV21_/2025-02-19_10_21_54.0/I11128519/ADNI_002_S_0413_MR_Accelerated_Sagittal_MPRAGE_(MSV21)__br_raw_20250226190620274_90.dcm
   üìÅ Extracted: 002_S_0413/Accelerated_Sagittal_MPRAGE__MSV21_/2025-02-19_10_21_54.0/I11128519/ADNI_002_S_0413_MR_Accelerated_Sagittal_MPRAGE_(MSV21)__br_raw_20250226190620365_179.dcm
   üìÅ Extracted: 002_S_0413/Accelerated_Sagittal_MPRAGE__MSV21_/2025-02-19_10_21_54.0/I11128519/ADNI_002_S_0413_MR_Accelerated_Sagittal_MPRAGE_(MSV21)__br_raw_20250226190620557_47.dcm
   üìÅ Extracted: 002_S_0413/Accelerated_Sagittal_MPRAGE__MSV21_/2025-02-19_10_21_54.0/I11128519/ADNI_002_S_0413_MR_Accelerated_Sagittal_MPRAGE_(MSV21)__br_raw_20250226190620686_101.dcm
   üìÅ E

In [10]:
def count_subject_ids_at_level(zip_paths):
    """
    Count subject IDs at the specific directory level: ADNI/[Subject_ID]/
    """
    import zipfile
    from pathlib import Path
    from collections import defaultdict
    
    print("üîç Counting subject IDs at ADNI/[Subject_ID] level across ZIP files...")
    print("=" * 70)
    
    # Dictionary to store subjects per ZIP
    zip_subjects = {}
    # Dictionary to track which subjects appear in multiple ZIPs
    subject_occurrences = defaultdict(list)
    
    for zip_path in zip_paths:
        zip_name = Path(zip_path).name
        print(f"\nüì¶ Processing: {zip_name}")
        
        try:
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                all_items = zip_ref.namelist()
                
                # Find subject IDs ONLY at the ADNI/[Subject_ID] level
                subjects_in_zip = set()
                for item in all_items:
                    parts = item.split('/')
                    
                    # Check if this item has the structure: ADNI/[Subject_ID]/...
                    if len(parts) >= 3:  # Must have at least 3 parts
                        if parts[0] == 'ADNI' and '_S_' in parts[1]:
                            # This is at the ADNI/[Subject_ID] level
                            subject_id = parts[1]
                            subjects_in_zip.add(subject_id)
                            subject_occurrences[subject_id].append(zip_name)
                
                zip_subjects[zip_name] = subjects_in_zip
                print(f"   ‚úÖ Found {len(subjects_in_zip)} unique subjects at ADNI/[Subject_ID] level")
                
        except Exception as e:
            print(f"   ‚ùå Error processing {zip_name}: {e}")
            zip_subjects[zip_name] = set()
        
    return zip_subjects

# Define the 5 ZIP file paths
zip_paths = [
    r"D:\ADNI\AD_CN\proteomics\Biomarkers Consortium Plasma Proteomics MRM\MRI\MRI_1 (1).zip",
    r"D:\ADNI\AD_CN\proteomics\Biomarkers Consortium Plasma Proteomics MRM\MRI\MRI_2.zip",
    r"D:\ADNI\AD_CN\proteomics\Biomarkers Consortium Plasma Proteomics MRM\MRI\MRI_3.zip",
    r"D:\ADNI\AD_CN\proteomics\Biomarkers Consortium Plasma Proteomics MRM\MRI\MRI_4.zip",
    r"D:\ADNI\AD_CN\proteomics\Biomarkers Consortium Plasma Proteomics MRM\MRI\MRI_5.zip"
]

# Count subject IDs at the specific level
print("üöÄ Starting subject ID count at ADNI/[Subject_ID] level...")
zip_subjects= count_subject_ids_at_level(zip_paths)

üöÄ Starting subject ID count at ADNI/[Subject_ID] level...
üîç Counting subject IDs at ADNI/[Subject_ID] level across ZIP files...

üì¶ Processing: MRI_1 (1).zip
   ‚úÖ Found 39 unique subjects at ADNI/[Subject_ID] level

üì¶ Processing: MRI_2.zip
   ‚úÖ Found 39 unique subjects at ADNI/[Subject_ID] level

üì¶ Processing: MRI_3.zip
   ‚úÖ Found 39 unique subjects at ADNI/[Subject_ID] level

üì¶ Processing: MRI_4.zip
   ‚úÖ Found 39 unique subjects at ADNI/[Subject_ID] level

üì¶ Processing: MRI_5.zip
   ‚úÖ Found 39 unique subjects at ADNI/[Subject_ID] level
