In [1]:
import os
import shutil

In [3]:
def copy_files_from_list(txt_file, source_dir, dest_dir):
    """
    Reads file names from `txt_file`, then copies each file
    from `source_dir` to `dest_dir`.
    
    :param txt_file: Path to the text file containing file names (one per line).
    :param source_dir: Directory where the files are currently located.
    :param dest_dir: Directory where the files should be copied.
    """
    # Ensure destination directory exists
    os.makedirs(dest_dir, exist_ok=True)

    # Open the text file and read lines
    with open(txt_file, 'r') as f:
        file_names = [line.strip() for line in f if line.strip()]
    
    # Loop through each file name in the text file
    for file_name in file_names:
        src_file = os.path.join(source_dir, file_name)
        dst_file = os.path.join(dest_dir, file_name)
        
        # Check if file exists in the source directory
        if os.path.isfile(src_file):
            # Copy the file
            shutil.copy2(src_file, dst_file)
            print(f"Copied: {file_name}")
        else:
            print(f"File not found (skipping): {file_name}")

In [5]:
txt_file = "processed-data/train_files.txt"
source_dir = "training-samples"
dest_dir = "gnn-train-split-samples"

copy_files_from_list(txt_file, source_dir, dest_dir)

Copied: pyruvate_metabolism_train_9622.csv
Copied: butirosin_and_neomyc_train_4999.csv
Copied: cysteine_and_methion_train_8208.csv
Copied: riboflavin_metabolis_train_5918.csv
Copied: biotin_metabolism_train_7622.csv
Copied: inositol_phosphate_m_train_6383.csv
Copied: glutathione_metaboli_train_2142.csv
Copied: propanoate_metabolis_train_1641.csv
Copied: glycolysis___glucone_train_7341.csv
Copied: amino_sugar_and_nucl_train_6644.csv
Copied: terpenoid_backbone_b_train_1198.csv
Copied: beta-alanine_metabol_train_2705.csv
Copied: tyrosine_metabolism_train_8918.csv
Copied: glycosylphosphatidyl_train_6909.csv
Copied: pyruvate_metabolism_train_6622.csv
Copied: porphyrin_and_chloro_train_4997.csv
Copied: glutathione_metaboli_train_4479.csv
Copied: glycosylphosphatidyl_train_2059.csv
Copied: phenylalanine,_tyros_train_2425.csv
Copied: ether_lipid_metaboli_train_1598.csv
Copied: glycosphingolipid_bi_train_2504.csv
Copied: glycosphingolipid_bi_train_9168.csv
Copied: thiamine_metabolism_train_5258

In [6]:
folder_path = 'gnn-train-split-samples'
files = os.listdir(folder_path)
num_files = len(files)
print(f"Total files: {num_files}")

Total files: 656
