In [1]:
import os

def print_folder_structure(startpath, indent=0):
    for item in os.listdir(startpath):
        path = os.path.join(startpath, item)
        print('   ' * indent + '|-- ' + item)
        if os.path.isdir(path):
            print_folder_structure(path, indent + 1)

if __name__ == "__main__":
    current_directory = os.getcwd()
    print("Current Folder Structure:\n")
    print_folder_structure(current_directory)

Current Folder Structure:

|-- .DS_Store
|-- mimic4_icd10
   |-- disch_10_full.csv
   |-- top50_icd10_code_list.txt
   |-- disch_test_split.csv
   |-- test_full_hadm_ids.csv
   |-- vocab.csv
   |-- train_full_hadm_ids.csv
   |-- test_50_hadm_ids.csv
   |-- disch_train_split.csv
   |-- TOP_50_CODES.csv
   |-- dev_full.csv
   |-- note_labels_icd10_filtered.csv
   |-- test_50.csv
   |-- dev_50_hadm_ids.csv
   |-- train_full.csv
   |-- disch_10_filtered.csv
   |-- test_full.csv
   |-- train_50_hadm_ids.csv
   |-- train_50.csv
   |-- ALL_CODES_filtered.csv
   |-- disch_dev_split.csv
   |-- dev_full_hadm_ids.csv
   |-- ALL_CODES.csv
   |-- dev_50.csv
|-- Untitled.ipynb
|-- mimic4_icd9
   |-- top50_icd9_code_list.txt
   |-- test_full_hadm_ids.csv
   |-- vocab.csv
   |-- train_full_hadm_ids.csv
   |-- test_50_hadm_ids.csv
   |-- TOP_50_CODES.csv
   |-- dev_50_hadm_ids.csv
   |-- train_50_hadm_ids.csv
   |-- dev_full_hadm_ids.csv
|-- .ipynb_checkpoints
   |-- Untitled-checkpoint.ipynb
|-- physi

In [3]:
import os
import shutil

# Directory path
mimic4_icd10_dir = './mimic4_icd10'

# Files to KEEP
files_to_keep = {
    'train_full.csv',
    'dev_full.csv',
    'test_full.csv',
    'train_50.csv',
    'dev_50.csv',
    'test_50.csv',
    'vocab.csv',
    'TOP_50_CODES.csv',
    'top50_icd10_code_list.txt'
}

# Get all files in the directory
all_files = os.listdir(mimic4_icd10_dir)

# Files to delete
files_to_delete = [f for f in all_files if f not in files_to_keep and os.path.isfile(os.path.join(mimic4_icd10_dir, f))]

# Show what will be deleted
print("=" * 60)
print("FILES TO BE DELETED:")
print("=" * 60)
for file in sorted(files_to_delete):
    file_path = os.path.join(mimic4_icd10_dir, file)
    file_size = os.path.getsize(file_path) / (1024 * 1024)  # Size in MB
    print(f"  - {file} ({file_size:.2f} MB)")

print("\n" + "=" * 60)
print("FILES TO BE KEPT:")
print("=" * 60)
for file in sorted(files_to_keep):
    file_path = os.path.join(mimic4_icd10_dir, file)
    if os.path.exists(file_path):
        file_size = os.path.getsize(file_path) / (1024 * 1024)  # Size in MB
        print(f"  ✓ {file} ({file_size:.2f} MB)")
    else:
        print(f"  ✗ {file} (NOT FOUND)")

# Ask for confirmation
print("\n" + "=" * 60)
response = input("Do you want to proceed with deletion? (yes/no): ").strip().lower()

if response == 'yes':
    deleted_count = 0
    total_space_freed = 0
    
    for file in files_to_delete:
        file_path = os.path.join(mimic4_icd10_dir, file)
        try:
            file_size = os.path.getsize(file_path)
            os.remove(file_path)
            deleted_count += 1
            total_space_freed += file_size
            print(f"  ✓ Deleted: {file}")
        except Exception as e:
            print(f"  ✗ Error deleting {file}: {e}")
    
    print("\n" + "=" * 60)
    print(f"DELETION COMPLETE!")
    print(f"  - Files deleted: {deleted_count}")
    print(f"  - Space freed: {total_space_freed / (1024 * 1024):.2f} MB")
    print("=" * 60)
else:
    print("\nDeletion cancelled. No files were deleted.")

FILES TO BE DELETED:
  - ALL_CODES.csv (55.35 MB)
  - ALL_CODES_filtered.csv (48.71 MB)
  - dev_50_hadm_ids.csv (0.03 MB)
  - dev_full_hadm_ids.csv (0.03 MB)
  - disch_10_filtered.csv (1187.37 MB)
  - disch_10_full.csv (2954.16 MB)
  - disch_dev_split.csv (39.57 MB)
  - disch_test_split.csv (75.88 MB)
  - disch_train_split.csv (1080.57 MB)
  - note_labels_icd10_filtered.csv (1196.20 MB)
  - test_50_hadm_ids.csv (0.06 MB)
  - test_full_hadm_ids.csv (0.07 MB)
  - train_50_hadm_ids.csv (0.89 MB)
  - train_full_hadm_ids.csv (0.95 MB)

FILES TO BE KEPT:
  ✓ TOP_50_CODES.csv (0.00 MB)
  ✓ dev_50.csv (37.79 MB)
  ✓ dev_full.csv (39.59 MB)
  ✓ test_50.csv (71.96 MB)
  ✓ test_full.csv (75.92 MB)
  ✓ top50_icd10_code_list.txt (0.00 MB)
  ✓ train_50.csv (1027.14 MB)
  ✓ train_full.csv (1081.08 MB)
  ✓ vocab.csv (0.60 MB)



Do you want to proceed with deletion? (yes/no):  yes


  ✓ Deleted: disch_10_full.csv
  ✓ Deleted: disch_test_split.csv
  ✓ Deleted: test_full_hadm_ids.csv
  ✓ Deleted: train_full_hadm_ids.csv
  ✓ Deleted: test_50_hadm_ids.csv
  ✓ Deleted: disch_train_split.csv
  ✓ Deleted: note_labels_icd10_filtered.csv
  ✓ Deleted: dev_50_hadm_ids.csv
  ✓ Deleted: disch_10_filtered.csv
  ✓ Deleted: train_50_hadm_ids.csv
  ✓ Deleted: ALL_CODES_filtered.csv
  ✓ Deleted: disch_dev_split.csv
  ✓ Deleted: dev_full_hadm_ids.csv
  ✓ Deleted: ALL_CODES.csv

DELETION COMPLETE!
  - Files deleted: 14
  - Space freed: 6639.84 MB


In [4]:
import os

def print_folder_structure(startpath, indent=0):
    for item in os.listdir(startpath):
        path = os.path.join(startpath, item)
        print('   ' * indent + '|-- ' + item)
        if os.path.isdir(path):
            print_folder_structure(path, indent + 1)

if __name__ == "__main__":
    current_directory = os.getcwd()
    print("Current Folder Structure:\n")
    print_folder_structure(current_directory)

Current Folder Structure:

|-- .DS_Store
|-- mimic4_icd10
   |-- full_code
      |-- dev_full.csv
      |-- train_full.csv
      |-- test_full.csv
      |-- .ipynb_checkpoints
         |-- dev_full-checkpoint.csv
   |-- vocab.csv
   |-- .ipynb_checkpoints
   |-- top_50
      |-- top50_icd10_code_list.txt
      |-- TOP_50_CODES.csv
      |-- test_50.csv
      |-- train_50.csv
      |-- dev_50.csv
|-- filter.ipynb
|-- .ipynb_checkpoints
   |-- filter-checkpoint.ipynb
