# Train-Test-Spitting of Data "OASIS Alzheimer's Detection"

This notebook investigates the dataset available on Kaggle (https://www.kaggle.com/datasets/ninadaithal/imagesoasis/code) and splits the data into train set and test set appropriately to avoid data leakage. The splitted data is moved into a new directory. <br>

Authors: <br>
Date: 15.07.2024

In [1]:
import os
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split

## Load Raw Data

In [2]:
# local path 
dataset_path = '/Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data'

In [3]:
# Four categories 
non_demented = []
very_mild_demented = []
mild_demented = []
moderate_demented = []

# Download per category
for dirname, _, filenames in os.walk(f'{dataset_path}/Non Demented'):
    for filename in filenames:
        non_demented.append(filename)
        
for dirname, _, filenames in os.walk(f'{dataset_path}/Very mild Dementia'):
    for filename in filenames:
        very_mild_demented.append(filename)
        
for dirname, _, filenames in os.walk(f'{dataset_path}/Mild Dementia'):
    for filename in filenames:
        mild_demented.append(filename)
        
for dirname, _, filenames in os.walk(f'{dataset_path}/Moderate Dementia'):
    for filename in filenames:
        moderate_demented.append(filename)

In [4]:
# Number of samples in each category: 
print(len(non_demented))
print(len(very_mild_demented))
print(len(mild_demented))
print(len(moderate_demented))

67222
13725
5003
488


In [5]:
# Create df with filename, patient_id
# 0 = non_demented
# 1 = very_mild_demented
# 2 = mild_demented
# 3 = moderate_demented

df_0 = pd.DataFrame(non_demented, columns=["filename"])
df_0["patient_id"] = df_0["filename"].apply(lambda x: x[5:9])
#df_very_mild_demented = df_very_mild_demented.drop("filename", axis=1)

df_1 = pd.DataFrame(very_mild_demented, columns=["filename"])
df_1["patient_id"] = df_1["filename"].apply(lambda x: x[5:9])

df_2 = pd.DataFrame(mild_demented, columns=["filename"])
df_2["patient_id"] = df_2["filename"].apply(lambda x: x[5:9])

df_3 = pd.DataFrame(moderate_demented, columns=["filename"])
df_3["patient_id"] = df_3["filename"].apply(lambda x: x[5:9])

## Investigate Raw Data

In [6]:
# Check unique values 
df_0.nunique()

filename      67222
patient_id      266
dtype: int64

In [7]:
df_1.nunique()

filename      13725
patient_id       58
dtype: int64

In [8]:
df_2.nunique()

filename      5003
patient_id      22
dtype: int64

In [9]:
df_3.nunique()

filename      488
patient_id      2
dtype: int64

In [21]:
# Check duplicates, there are none 
#df_0 = df_0["filename"].drop_duplicates()
#df_1 = df_1["filename"].drop_duplicates()
#df_2 = df_2["filename"].drop_duplicates()
#df_3 = df_3["filename"].drop_duplicates()

In [10]:
# investigate images 
df_0.nunique()

filename      67222
patient_id      266
dtype: int64

In [11]:
# Number of unique patients per class 
print(len(df_0.value_counts("patient_id")))
print(len(df_1.value_counts("patient_id")))
print(len(df_2.value_counts("patient_id")))
print(len(df_3.value_counts("patient_id")))

266
58
22
2


## Train-Test-Split each class

In [12]:
# Extract unique patient IDs
unique_patients_0 = df_0['patient_id'].unique()
unique_patients_1 = df_1['patient_id'].unique()
unique_patients_2 = df_2['patient_id'].unique()
unique_patients_3 = df_3['patient_id'].unique()

# Split the patient IDs into training and testing sets
train_patients_0, test_patients_0 = train_test_split(unique_patients_0, test_size=0.3, random_state=42)
train_patients_1, test_patients_1 = train_test_split(unique_patients_1, test_size=0.3, random_state=42)
train_patients_2, test_patients_2 = train_test_split(unique_patients_2, test_size=0.3, random_state=42)
train_patients_3, test_patients_3 = train_test_split(unique_patients_3, test_size=0.3, random_state=42)

# Create a boolean mask for splitting the original dataframe
train_mask_0 = df_0['patient_id'].isin(train_patients_0)
test_mask_0 = df_0['patient_id'].isin(test_patients_0)

train_mask_1 = df_1['patient_id'].isin(train_patients_1)
test_mask_1 = df_1['patient_id'].isin(test_patients_1)

train_mask_2 = df_2['patient_id'].isin(train_patients_2)
test_mask_2 = df_2['patient_id'].isin(test_patients_2)

train_mask_3 = df_3['patient_id'].isin(train_patients_3)
test_mask_3 = df_3['patient_id'].isin(test_patients_3)

# Split the dataframe into training and testing sets
train_df_0 = df_0[train_mask_0]
test_df_0 = df_0[test_mask_0]

train_df_1 = df_1[train_mask_1]
test_df_1 = df_1[test_mask_1]

train_df_2 = df_2[train_mask_2]
test_df_2 = df_2[test_mask_2]

train_df_3 = df_3[train_mask_3]
test_df_3 = df_3[test_mask_3]

In [13]:
# check train
# df_0 has 266 unique patients
train_df_0.nunique()

filename      46848
patient_id      186
dtype: int64

In [14]:
# check test
# it worked 
test_df_0.nunique()

filename      20374
patient_id       80
dtype: int64

## Sort data into new folder structure

In [15]:
# train filenames to list
filenames_train_0 = list(train_df_0["filename"])
filenames_train_1 = list(train_df_1["filename"])
filenames_train_2 = list(train_df_2["filename"])
filenames_train_3 = list(train_df_3["filename"])

In [16]:
# test filenames to list
filenames_test_0 = list(test_df_0["filename"])
filenames_test_1 = list(test_df_1["filename"])
filenames_test_2 = list(test_df_2["filename"])
filenames_test_3 = list(test_df_3["filename"])

In [17]:
filenames_train_0

['OAS1_0349_MR1_mpr-4_142.jpg',
 'OAS1_0317_MR1_mpr-1_141.jpg',
 'OAS1_0234_MR1_mpr-4_136.jpg',
 'OAS1_0182_MR1_mpr-1_139.jpg',
 'OAS1_0132_MR1_mpr-2_117.jpg',
 'OAS1_0055_MR1_mpr-3_129.jpg',
 'OAS1_0229_MR1_mpr-2_148.jpg',
 'OAS1_0322_MR1_mpr-2_132.jpg',
 'OAS1_0356_MR1_mpr-4_126.jpg',
 'OAS1_0275_MR1_mpr-1_151.jpg',
 'OAS1_0141_MR1_mpr-3_137.jpg',
 'OAS1_0026_MR1_mpr-2_109.jpg',
 'OAS1_0369_MR1_mpr-3_120.jpg',
 'OAS1_0330_MR1_mpr-1_117.jpg',
 'OAS1_0346_MR1_mpr-1_119.jpg',
 'OAS1_0086_MR1_mpr-4_118.jpg',
 'OAS1_0376_MR1_mpr-3_144.jpg',
 'OAS1_0195_MR1_mpr-3_132.jpg',
 'OAS1_0224_MR1_mpr-1_109.jpg',
 'OAS1_0289_MR1_mpr-4_159.jpg',
 'OAS1_0253_MR1_mpr-1_101.jpg',
 'OAS1_0043_MR1_mpr-1_124.jpg',
 'OAS1_0193_MR1_mpr-4_100.jpg',
 'OAS1_0076_MR1_mpr-2_157.jpg',
 'OAS1_0254_MR1_mpr-6_135.jpg',
 'OAS1_0377_MR1_mpr-3_142.jpg',
 'OAS1_0295_MR1_mpr-2_121.jpg',
 'OAS1_0109_MR1_mpr-4_139.jpg',
 'OAS1_0002_MR1_mpr-4_143.jpg',
 'OAS1_0331_MR1_mpr-1_111.jpg',
 'OAS1_0368_MR1_mpr-3_126.jpg',
 'OAS1_0

In [49]:
# Create new directory
"""
!mkdir /Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data_split
!mkdir /Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data_split/train
!mkdir /Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data_split/train/Mild_dementia
!mkdir /Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data_split/train/Moderate_dementia
!mkdir /Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data_split/train/Non_demented
!mkdir /Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data_split/train/Very_Mild_dementia

!mkdir /Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data_split/test
!mkdir /Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data_split/test/Mild_dementia
!mkdir /Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data_split/test/Moderate_dementia
!mkdir /Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data_split/test/Non_demented
!mkdir /Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data_split/test/Very_Mild_dementia
"""

### Non Demented

In [None]:
# 0 = non_demented
# 1 = very_mild_demented
# 2 = mild_demented
# 3 = moderate_demented

In [26]:
# Define the source and destination directories
root_dir = "/Users/linamurasov/code/murasovl/alzheimers-detection/raw_data"

source_dir = "/Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data/Non Demented"
train_destination_dir = f"{root_dir}/Data_split/train/Non_demented"
test_destination_dir = f"{root_dir}/Data_split/test/Non_demented"

# Ensure destination directory exists
os.makedirs(train_destination_dir, exist_ok=True)
os.makedirs(test_destination_dir, exist_ok=True)

In [19]:
test_destination_dir

'/Users/linamurasov/code/murasovl/alzheimers-detection/raw_data/Data_split/test/Mild_dementia'

#### Train

In [29]:
# List of filenames to be moved
filenames_to_move = filenames_train_0

# Iterate over files in the source directory
for filename in os.listdir(source_dir):
    if filename in filenames_to_move:
        # Construct full file path
        source_path = os.path.join(source_dir, filename)
        # PUT TRAIN OR TEST HERE
        destination_path = os.path.join(train_destination_dir, filename)
        
        # Move the file
        shutil.move(source_path, destination_path)
        print(f'Moved: {filename}')

Moved: OAS1_0349_MR1_mpr-4_142.jpg
Moved: OAS1_0317_MR1_mpr-1_141.jpg
Moved: OAS1_0234_MR1_mpr-4_136.jpg
Moved: OAS1_0182_MR1_mpr-1_139.jpg
Moved: OAS1_0132_MR1_mpr-2_117.jpg
Moved: OAS1_0055_MR1_mpr-3_129.jpg
Moved: OAS1_0229_MR1_mpr-2_148.jpg
Moved: OAS1_0322_MR1_mpr-2_132.jpg
Moved: OAS1_0356_MR1_mpr-4_126.jpg
Moved: OAS1_0275_MR1_mpr-1_151.jpg
Moved: OAS1_0141_MR1_mpr-3_137.jpg
Moved: OAS1_0026_MR1_mpr-2_109.jpg
Moved: OAS1_0369_MR1_mpr-3_120.jpg
Moved: OAS1_0330_MR1_mpr-1_117.jpg
Moved: OAS1_0346_MR1_mpr-1_119.jpg
Moved: OAS1_0086_MR1_mpr-4_118.jpg
Moved: OAS1_0376_MR1_mpr-3_144.jpg
Moved: OAS1_0195_MR1_mpr-3_132.jpg
Moved: OAS1_0224_MR1_mpr-1_109.jpg
Moved: OAS1_0289_MR1_mpr-4_159.jpg
Moved: OAS1_0253_MR1_mpr-1_101.jpg
Moved: OAS1_0043_MR1_mpr-1_124.jpg
Moved: OAS1_0193_MR1_mpr-4_100.jpg
Moved: OAS1_0076_MR1_mpr-2_157.jpg
Moved: OAS1_0254_MR1_mpr-6_135.jpg
Moved: OAS1_0377_MR1_mpr-3_142.jpg
Moved: OAS1_0295_MR1_mpr-2_121.jpg
Moved: OAS1_0109_MR1_mpr-4_139.jpg
Moved: OAS1_0002_MR1

#### Test

In [30]:
# List of filenames to be moved
filenames_to_move = filenames_test_0

# Iterate over files in the source directory
for filename in os.listdir(source_dir):
    if filename in filenames_to_move:
        # Construct full file path
        source_path = os.path.join(source_dir, filename)
        # PUT TRAIN OR TEST HERE
        destination_path = os.path.join(test_destination_dir, filename)
        
        # Move the file
        shutil.move(source_path, destination_path)
        #print(f'Moved: {filename}')

### Very Mild Dementia

In [34]:
# Define the source and destination directories
root_dir = "/Users/linamurasov/code/murasovl/alzheimers-detection/raw_data"

source_dir = f"{root_dir}/Data/Very Mild Dementia"
train_destination_dir = f"{root_dir}/Data_split/train/Very_Mild_dementia"
test_destination_dir = f"{root_dir}/Data_split/test/Very_Mild_dementia"

# Ensure destination directory exists
os.makedirs(train_destination_dir, exist_ok=True)
os.makedirs(test_destination_dir, exist_ok=True)

#### Train

In [35]:
# List of filenames to be moved
filenames_to_move = filenames_train_1

# Iterate over files in the source directory
for filename in os.listdir(source_dir):
    if filename in filenames_to_move:
        # Construct full file path
        source_path = os.path.join(source_dir, filename)
        # PUT TRAIN OR TEST HERE
        destination_path = os.path.join(train_destination_dir, filename)
        
        # Move the file
        shutil.move(source_path, destination_path)
        print(f'Moved: {filename}')

Moved: OAS1_0233_MR1_mpr-3_102.jpg
Moved: OAS1_0286_MR1_mpr-1_102.jpg
Moved: OAS1_0003_MR1_mpr-4_145.jpg
Moved: OAS1_0084_MR1_mpr-2_102.jpg
Moved: OAS1_0042_MR1_mpr-1_122.jpg
Moved: OAS1_0161_MR1_mpr-4_155.jpg
Moved: OAS1_0298_MR1_mpr-1_160.jpg
Moved: OAS1_0263_MR1_mpr-3_148.jpg
Moved: OAS1_0003_MR1_mpr-4_151.jpg
Moved: OAS1_0267_MR1_mpr-2_160.jpg
Moved: OAS1_0084_MR1_mpr-2_116.jpg
Moved: OAS1_0161_MR1_mpr-4_141.jpg
Moved: OAS1_0042_MR1_mpr-1_136.jpg
Moved: OAS1_0233_MR1_mpr-3_116.jpg
Moved: OAS1_0286_MR1_mpr-1_116.jpg
Moved: OAS1_0267_MR1_mpr-2_148.jpg
Moved: OAS1_0039_MR1_mpr-2_151.jpg
Moved: OAS1_0339_MR1_mpr-3_142.jpg
Moved: OAS1_0243_MR1_mpr-4_102.jpg
Moved: OAS1_0098_MR1_mpr-4_146.jpg
Moved: OAS1_0210_MR1_mpr-2_140.jpg
Moved: OAS1_0263_MR1_mpr-3_160.jpg
Moved: OAS1_0290_MR1_mpr-3_133.jpg
Moved: OAS1_0098_MR1_mpr-4_152.jpg
Moved: OAS1_0210_MR1_mpr-2_154.jpg
Moved: OAS1_0290_MR1_mpr-3_127.jpg
Moved: OAS1_0339_MR1_mpr-3_156.jpg
Moved: OAS1_0243_MR1_mpr-4_116.jpg
Moved: OAS1_0298_MR1

#### Test

In [36]:
# List of filenames to be moved
filenames_to_move = filenames_test_1

# Iterate over files in the source directory
for filename in os.listdir(source_dir):
    if filename in filenames_to_move:
        # Construct full file path
        source_path = os.path.join(source_dir, filename)
        # PUT TRAIN OR TEST HERE
        destination_path = os.path.join(test_destination_dir, filename)
        
        # Move the file
        shutil.move(source_path, destination_path)
        print(f'Moved: {filename}')

Moved: OAS1_0023_MR1_mpr-3_127.jpg
Moved: OAS1_0240_MR1_mpr-2_122.jpg
Moved: OAS1_0115_MR1_mpr-2_141.jpg
Moved: OAS1_0179_MR1_mpr-3_105.jpg
Moved: OAS1_0380_MR1_mpr-2_139.jpg
Moved: OAS1_0120_MR1_mpr-1_132.jpg
Moved: OAS1_0287_MR1_mpr-1_104.jpg
Moved: OAS1_0022_MR1_mpr-3_121.jpg
Moved: OAS1_0287_MR1_mpr-1_110.jpg
Moved: OAS1_0022_MR1_mpr-3_135.jpg
Moved: OAS1_0179_MR1_mpr-3_111.jpg
Moved: OAS1_0115_MR1_mpr-2_155.jpg
Moved: OAS1_0120_MR1_mpr-1_126.jpg
Moved: OAS1_0023_MR1_mpr-3_133.jpg
Moved: OAS1_0240_MR1_mpr-2_136.jpg
Moved: OAS1_0380_MR1_mpr-2_105.jpg
Moved: OAS1_0179_MR1_mpr-3_139.jpg
Moved: OAS1_0307_MR1_mpr-4_142.jpg
Moved: OAS1_0287_MR1_mpr-1_138.jpg
Moved: OAS1_0022_MR1_mpr-3_109.jpg
Moved: OAS1_0380_MR1_mpr-2_111.jpg
Moved: OAS1_0166_MR1_mpr-3_149.jpg
Moved: OAS1_0307_MR1_mpr-4_156.jpg
Moved: OAS1_0247_MR1_mpr-3_147.jpg
Moved: OAS1_0115_MR1_mpr-4_110.jpg
Moved: OAS1_0115_MR1_mpr-4_104.jpg
Moved: OAS1_0247_MR1_mpr-3_153.jpg
Moved: OAS1_0041_MR1_mpr-1_153.jpg
Moved: OAS1_0288_MR1

### Mild Dementia

In [37]:
# Define the source and destination directories
root_dir = "/Users/linamurasov/code/murasovl/alzheimers-detection/raw_data"

source_dir = f"{root_dir}/Data/Mild Dementia"
train_destination_dir = f"{root_dir}/Data_split/train/Mild_dementia"
test_destination_dir = f"{root_dir}/Data_split/test/Mild_dementia"

# Ensure destination directory exists
os.makedirs(train_destination_dir, exist_ok=True)
os.makedirs(test_destination_dir, exist_ok=True)

#### Train

In [38]:
# List of filenames to be moved
filenames_to_move = filenames_train_2

# Iterate over files in the source directory
for filename in os.listdir(source_dir):
    if filename in filenames_to_move:
        # Construct full file path
        source_path = os.path.join(source_dir, filename)
        # PUT TRAIN OR TEST HERE
        destination_path = os.path.join(train_destination_dir, filename)
        
        # Move the file
        shutil.move(source_path, destination_path)
        #print(f'Moved: {filename}')

#### Test

In [39]:
# List of filenames to be moved
filenames_to_move = filenames_test_2

# Iterate over files in the source directory
for filename in os.listdir(source_dir):
    if filename in filenames_to_move:
        # Construct full file path
        source_path = os.path.join(source_dir, filename)
        # PUT TRAIN OR TEST HERE
        destination_path = os.path.join(test_destination_dir, filename)
        
        # Move the file
        shutil.move(source_path, destination_path)
        print(f'Moved: {filename}')

Moved: OAS1_0137_MR1_mpr-3_139.jpg
Moved: OAS1_0291_MR1_mpr-3_109.jpg
Moved: OAS1_0052_MR1_mpr-4_109.jpg
Moved: OAS1_0291_MR1_mpr-3_135.jpg
Moved: OAS1_0052_MR1_mpr-4_121.jpg
Moved: OAS1_0137_MR1_mpr-3_105.jpg
Moved: OAS1_0035_MR1_mpr-1_116.jpg
Moved: OAS1_0035_MR1_mpr-1_102.jpg
Moved: OAS1_0052_MR1_mpr-4_135.jpg
Moved: OAS1_0137_MR1_mpr-3_111.jpg
Moved: OAS1_0291_MR1_mpr-3_121.jpg
Moved: OAS1_0052_MR1_mpr-2_158.jpg
Moved: OAS1_0269_MR1_mpr-1_144.jpg
Moved: OAS1_0268_MR1_mpr-1_142.jpg
Moved: OAS1_0268_MR1_mpr-1_156.jpg
Moved: OAS1_0269_MR1_mpr-1_150.jpg
Moved: OAS1_0291_MR1_mpr-2_118.jpg
Moved: OAS1_0137_MR1_mpr-2_128.jpg
Moved: OAS1_0137_MR1_mpr-2_114.jpg
Moved: OAS1_0291_MR1_mpr-2_124.jpg
Moved: OAS1_0291_MR1_mpr-2_130.jpg
Moved: OAS1_0137_MR1_mpr-2_100.jpg
Moved: OAS1_0291_MR1_mpr-4_149.jpg
Moved: OAS1_0052_MR1_mpr-3_149.jpg
Moved: OAS1_0137_MR1_mpr-4_145.jpg
Moved: OAS1_0137_MR1_mpr-4_151.jpg
Moved: OAS1_0269_MR1_mpr-3_109.jpg
Moved: OAS1_0268_MR1_mpr-3_127.jpg
Moved: OAS1_0269_MR1

### Moderate Dementia

In [40]:
# Define the source and destination directories
root_dir = "/Users/linamurasov/code/murasovl/alzheimers-detection/raw_data"

source_dir = f"{root_dir}/Data/Moderate Dementia"
train_destination_dir = f"{root_dir}/Data_split/train/Moderate_dementia"
test_destination_dir = f"{root_dir}/Data_split/test/Moderate_dementia"

# Ensure destination directory exists
os.makedirs(train_destination_dir, exist_ok=True)
os.makedirs(test_destination_dir, exist_ok=True)

#### Train

In [41]:
# List of filenames to be moved
filenames_to_move = filenames_train_3

# Iterate over files in the source directory
for filename in os.listdir(source_dir):
    if filename in filenames_to_move:
        # Construct full file path
        source_path = os.path.join(source_dir, filename)
        # PUT TRAIN OR TEST HERE
        destination_path = os.path.join(train_destination_dir, filename)
        
        # Move the file
        shutil.move(source_path, destination_path)
        print(f'Moved: {filename}')

Moved: OAS1_0308_MR1_mpr-1_125.jpg
Moved: OAS1_0308_MR1_mpr-1_131.jpg
Moved: OAS1_0308_MR1_mpr-1_119.jpg
Moved: OAS1_0308_MR1_mpr-3_140.jpg
Moved: OAS1_0308_MR1_mpr-3_154.jpg
Moved: OAS1_0308_MR1_mpr-4_100.jpg
Moved: OAS1_0308_MR1_mpr-4_114.jpg
Moved: OAS1_0308_MR1_mpr-4_128.jpg
Moved: OAS1_0308_MR1_mpr-2_151.jpg
Moved: OAS1_0308_MR1_mpr-2_145.jpg
Moved: OAS1_0308_MR1_mpr-2_144.jpg
Moved: OAS1_0308_MR1_mpr-2_150.jpg
Moved: OAS1_0308_MR1_mpr-4_129.jpg
Moved: OAS1_0308_MR1_mpr-4_115.jpg
Moved: OAS1_0308_MR1_mpr-4_101.jpg
Moved: OAS1_0308_MR1_mpr-3_155.jpg
Moved: OAS1_0308_MR1_mpr-3_141.jpg
Moved: OAS1_0308_MR1_mpr-1_118.jpg
Moved: OAS1_0308_MR1_mpr-1_130.jpg
Moved: OAS1_0308_MR1_mpr-1_124.jpg
Moved: OAS1_0308_MR1_mpr-1_132.jpg
Moved: OAS1_0308_MR1_mpr-1_126.jpg
Moved: OAS1_0308_MR1_mpr-3_157.jpg
Moved: OAS1_0308_MR1_mpr-3_143.jpg
Moved: OAS1_0308_MR1_mpr-4_117.jpg
Moved: OAS1_0308_MR1_mpr-4_103.jpg
Moved: OAS1_0308_MR1_mpr-2_146.jpg
Moved: OAS1_0308_MR1_mpr-2_152.jpg
Moved: OAS1_0308_MR1

#### Test

In [42]:
# List of filenames to be moved
filenames_to_move = filenames_test_3

# Iterate over files in the source directory
for filename in os.listdir(source_dir):
    if filename in filenames_to_move:
        # Construct full file path
        source_path = os.path.join(source_dir, filename)
        # PUT TRAIN OR TEST HERE
        destination_path = os.path.join(test_destination_dir, filename)
        
        # Move the file
        shutil.move(source_path, destination_path)
        print(f'Moved: {filename}')

Moved: OAS1_0351_MR1_mpr-3_112.jpg
Moved: OAS1_0351_MR1_mpr-3_106.jpg
Moved: OAS1_0351_MR1_mpr-2_103.jpg
Moved: OAS1_0351_MR1_mpr-2_117.jpg
Moved: OAS1_0351_MR1_mpr-4_152.jpg
Moved: OAS1_0351_MR1_mpr-4_146.jpg
Moved: OAS1_0351_MR1_mpr-4_147.jpg
Moved: OAS1_0351_MR1_mpr-4_153.jpg
Moved: OAS1_0351_MR1_mpr-2_116.jpg
Moved: OAS1_0351_MR1_mpr-2_102.jpg
Moved: OAS1_0351_MR1_mpr-3_107.jpg
Moved: OAS1_0351_MR1_mpr-3_113.jpg
Moved: OAS1_0351_MR1_mpr-3_105.jpg
Moved: OAS1_0351_MR1_mpr-3_111.jpg
Moved: OAS1_0351_MR1_mpr-3_139.jpg
Moved: OAS1_0351_MR1_mpr-2_114.jpg
Moved: OAS1_0351_MR1_mpr-2_100.jpg
Moved: OAS1_0351_MR1_mpr-2_128.jpg
Moved: OAS1_0351_MR1_mpr-4_145.jpg
Moved: OAS1_0351_MR1_mpr-4_151.jpg
Moved: OAS1_0351_MR1_mpr-1_160.jpg
Moved: OAS1_0351_MR1_mpr-1_148.jpg
Moved: OAS1_0351_MR1_mpr-1_149.jpg
Moved: OAS1_0351_MR1_mpr-4_150.jpg
Moved: OAS1_0351_MR1_mpr-4_144.jpg
Moved: OAS1_0351_MR1_mpr-2_129.jpg
Moved: OAS1_0351_MR1_mpr-2_101.jpg
Moved: OAS1_0351_MR1_mpr-2_115.jpg
Moved: OAS1_0351_MR1

In [47]:
# Check len 

In [46]:
len(filenames_train_0)
len(filenames_test_0)
len(filenames_train_0+filenames_test_0)

67222

In [48]:
len(filenames_train_1)
len(filenames_test_1)
len(filenames_train_1+filenames_test_1)

13725

In [51]:
len(filenames_train_2)
#len(filenames_test_2)
#len(filenames_train_2+filenames_test_2)

3295

In [50]:
len(filenames_train_3)
len(filenames_test_3)
len(filenames_train_3+filenames_test_3)

488