In [1]:
import sys
sys.path.append('eucalc_directory')
import eucalc as ec
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm

from numpy import genfromtxt
from sklearn.decomposition import PCA

# Path to the folder containing the CSV files
datafolder = "Old_Young_Comparison"

all_files = os.listdir(datafolder)

# Get all CSV filenames in the folder
names = [file for file in all_files if file.lower().endswith(('.tif', '.tiff'))]


# Split names into K8 and K14 groups
names_k8 = [nm for nm in names if 'K8' in nm]
names_k14 = [nm for nm in names if 'K14' in nm]

## k8

### sampeuler

In [2]:
import os
import tifffile as tiff
import eucalc as ec
import matplotlib.colors as mcolors
from PIL import Image
class EctImg:
    def __init__(self, nm, img, k=20, xinterval=(-1., 1.), xpoints=100, yinterval=(-1., 1.), ypoints=100):
        self.xinterval = xinterval
        self.yinterval = yinterval
        self.xpoints = xpoints
        self.ypoints = ypoints
        self.image = self.compute(img, k, xinterval, xpoints, yinterval, ypoints)
        self.nm = nm
    def compute(self, img, k, xinterval, xpoints, yinterval, ypoints):
        cplx = ec.EmbeddedComplex(img)
        cplx.preproc_ect()
        thetas = np.random.uniform(0, 2 * np.pi, k + 1)
        ect1 = np.empty((k, xpoints), dtype=float)
        for i in range(k):
            theta = thetas[i]
            direction = np.array((np.sin(theta), np.cos(theta)))
            ect_dir = cplx.compute_euler_characteristic_transform(direction)
            T = np.linspace(xinterval[0], xinterval[1], xpoints)
            ect1[i] = [ect_dir.evaluate(t) for t in T]
        return ect1
    
    def plot(self):
        plt.figure(figsize=(10, 8))
        # Using xinterval and yinterval directly in extent
        plt.imshow(self.image, aspect='auto', extent=[self.xinterval[0], self.xinterval[1], self.yinterval[0], self.yinterval[1]], origin='lower', interpolation='none')
        plt.colorbar(label='Density')
        plt.xlabel('X-axis')
        plt.ylabel('Y-axis')
        plt.title('ECT Image Plot for '+ self.nm)
        plt.show()


def compute_ExIm(names, datafolder, k=480, xinterval=(-1.5, 1.5), xpoints=300, yinterval=(-450., 50.), ypoints=500):
    ExImage = []
    
    for nm in names: 
        file_path = os.path.join(datafolder, nm)
        with Image.open(file_path) as img:
            img_array = np.array(img)
        
        # Now, compute the ECT image using the new array
        ect = EctImg(nm, img_array, k, xinterval, xpoints, yinterval, ypoints)
        exim = ect.compute(img_array, k, xinterval, xpoints, yinterval, ypoints)
        ExImage.append(exim)

    return ExImage

exims = compute_ExIm(names_k8, datafolder,k=360, xinterval=(-1.5, 1.5), xpoints=75, yinterval=(-20, 60), ypoints=80)
flattened_k8 = [image.flatten() for image in exims]

### ect

In [2]:
import os
import tifffile as tiff
import eucalc as ec
import matplotlib.colors as mcolors
from PIL import Image
class EctImg:
    def __init__(self, nm, img, k=20, xinterval=(-1., 1.), xpoints=100, yinterval=(-1., 1.), ypoints=100):
        self.xinterval = xinterval
        self.yinterval = yinterval
        self.xpoints = xpoints
        self.ypoints = ypoints
        self.image = self.compute(img, k, xinterval, xpoints, yinterval, ypoints)
        self.nm = nm
    def compute(self, img, k, xinterval, xpoints, yinterval, ypoints):
        cplx = ec.EmbeddedComplex(img)
        cplx.preproc_ect()
        thetas = np.linspace(0, 2 * np.pi, k + 1)
        ect1 = np.empty((k, xpoints), dtype=float)
        for i in range(k):
            theta = thetas[i]
            direction = np.array((np.sin(theta), np.cos(theta)))
            ect_dir = cplx.compute_euler_characteristic_transform(direction)
            T = np.linspace(xinterval[0], xinterval[1], xpoints)
            ect1[i] = [ect_dir.evaluate(t) for t in T]
        return ect1


def compute_ExIm(names, datafolder, k=480, xinterval=(-1.5, 1.5), xpoints=300, yinterval=(-450., 50.), ypoints=500):
    ExImage = []
    
    for nm in names: 
        file_path = os.path.join(datafolder, nm)
        with Image.open(file_path) as img:
            img_array = np.array(img)
        
        # Now, compute the ECT image using the new array
        ect = EctImg(nm, img_array, k, xinterval, xpoints, yinterval, ypoints)
        exim = ect.compute(img_array, k, xinterval, xpoints, yinterval, ypoints)
        ExImage.append(exim)

    return ExImage

exims = compute_ExIm(names_k8, datafolder,k=360, xinterval=(-1.5, 1.5), xpoints=150, yinterval=(-20, 60), ypoints=80)
flattened_k8 = [image.flatten() for image in exims]

In [3]:
import shap
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np
import matplotlib.pyplot as plt
from skimage.measure import block_reduce

def extract_age(name):
    return name[0]

# Assume names_k8 and flattened_k8 are already defined
ages_k8 = [extract_age(name) for name in names_k8]
X = np.array(flattened_k8)
y = np.array(ages_k8)
unique_ages = np.unique(y)
ypoints, xpoints = 110, 300  # Set the appropriate image dimensions

# Number of trials for train-test splitting
num_trials = 50
age = unique_ages[0]
print(f"Processing age group: {age}")

# Binary classification: 1 if the sample belongs to the current age, 0 otherwise
binary_labels = np.where(y == age, 1, 0)

# Parameter grid for Random Forest
param_grid = {
    'n_estimators': [50, 100, 200, 500, 1000],   # Number of trees in the forest
    'max_depth': [3, 5, 7, 20, None],             # Tree depth (None means no limit)
    'min_samples_split': [2, 3, 5, 10],           # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4]
}

# Perform Grid Search to get the best parameters
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=40),
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,
    n_jobs=-1
)
grid_search.fit(X, binary_labels)

# Extract best parameters from the grid search
best_params = grid_search.best_params_
print(f"Best parameters for age group {age}: {best_params}")

scores = []
# Loop for multiple random splits
for trial in range(num_trials):
    # Randomize train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, binary_labels, test_size=0.2, random_state=40+trial)

    # Initialize and train the Random Forest classifier using best_params
    rf_classifier = RandomForestClassifier(random_state=40, **best_params)
    rf_classifier.fit(X_train, y_train)
    score = rf_classifier.score(X_test, y_test)
    scores.append(score)
    
print(f'Average accuracy for age {age} is {np.mean(scores)}')


  from .autonotebook import tqdm as notebook_tqdm


Processing age group: O
Best parameters for age group O: {'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 50}
Average accuracy for age O is 0.6857142857142857


### DETECT

In [4]:
import numpy as np
import matplotlib.pyplot as plt

def process_ect_image(ect_image, xinterval, xpoints):
    """
    Process a 2D ECT image by:
      1. Averaging across curves (axis=0) to yield a mean curve.
      2. Integrating the mean curve using a cumulative sum.
      3. Subtracting the average (mean) value from the integrated curve for baseline correction.
    
    Parameters:
      ect_image : numpy.ndarray
          2D array with shape (k, xpoints), where each row is one curve.
      xinterval : tuple of float
          The (min, max) values of the x-axis, e.g., (-1.5, 1.5).
      xpoints : int
          Number of points along the x-direction.
          
    Returns:
      integrated_curve_centered : numpy.ndarray
          1D processed curve with the same length as the number of xpoints.
    """
    # Step 1: Average across all curves (rows) to get one representative curve.
    mean_curve = np.mean(ect_image, axis=0)
    
    # Compute spacing (assuming uniform distribution in the given xinterval)
    dx = (xinterval[1] - xinterval[0]) / (xpoints - 1)
    
    # Step 2: Numerically integrate the mean curve using a cumulative sum.
    # The cumulative integration approximates the integral from the start to each x point.
    integrated_curve = np.cumsum(mean_curve) * dx
    
    # Step 3: Subtract the average value of the integrated curve (baseline correction)
    baseline = np.mean(integrated_curve)
    integrated_curve_centered = integrated_curve - baseline
    
    return integrated_curve_centered

# Example usage after computing your ECT images (exims)
# Assume exims is a list of 2D arrays from your compute_ExIm function,
# and the x-interval and number of x-points correspond to how you computed them.
xinterval = (-1.5, 1.5)
xpoints = 75

# Process each sample (ECT image)
processed_samples = [process_ect_image(ect_image, xinterval, xpoints) for ect_image in exims]



In [6]:
import shap
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np
import matplotlib.pyplot as plt
from skimage.measure import block_reduce

def extract_age(name):
    return name[0]

# Assume names_k8 and flattened_k8 are already defined
ages_k8 = [extract_age(name) for name in names_k8]
X = np.array(processed_samples)
y = np.array(ages_k8)
unique_ages = np.unique(y)
ypoints, xpoints = 110, 300  # Set the appropriate image dimensions

# Number of trials for train-test splitting
num_trials = 50
age = unique_ages[0]
print(f"Processing age group: {age}")

# Binary classification: 1 if the sample belongs to the current age, 0 otherwise
binary_labels = np.where(y == age, 1, 0)

# Parameter grid for Random Forest
param_grid = {
    'n_estimators': [50, 100, 200, 500, 1000],   # Number of trees in the forest
    'max_depth': [3, 5, 7, 20, None],             # Tree depth (None means no limit)
    'min_samples_split': [2, 3, 5, 10],           # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4]
}

# Perform Grid Search to get the best parameters
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=40),
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,
    n_jobs=-1
)
grid_search.fit(X, binary_labels)

# Extract best parameters from the grid search
best_params = grid_search.best_params_
print(f"Best parameters for age group {age}: {best_params}")

scores = []
# Loop for multiple random splits
for trial in range(num_trials):
    # Randomize train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, binary_labels, test_size=0.2, random_state=40+trial)

    # Initialize and train the Random Forest classifier using best_params
    rf_classifier = RandomForestClassifier(random_state=40, **best_params)
    rf_classifier.fit(X_train, y_train)
    score = rf_classifier.score(X_test, y_test)
    scores.append(score)
    
print(f'Average accuracy for age {age} is {np.mean(scores)}')


Processing age group: O
Best parameters for age group O: {'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 200}
Average accuracy for age O is 0.7171428571428571


## k14

### sampeuler

In [None]:
import os
import tifffile as tiff
import eucalc as ec
import matplotlib.colors as mcolors
from PIL import Image
class EctImg:
    def __init__(self, nm, img, k=20, xinterval=(-1., 1.), xpoints=100, yinterval=(-1., 1.), ypoints=100):
        self.xinterval = xinterval
        self.yinterval = yinterval
        self.xpoints = xpoints
        self.ypoints = ypoints
        self.image = self.compute(img, k, xinterval, xpoints, yinterval, ypoints)
        self.nm = nm
    def compute(self, img, k, xinterval, xpoints, yinterval, ypoints):
        cplx = ec.EmbeddedComplex(img)
        cplx.preproc_ect()
        thetas = np.random.uniform(0, 2 * np.pi, k + 1)
        ect1 = np.empty((k, xpoints), dtype=float)
        for i in range(k):
            theta = thetas[i]
            direction = np.array((np.sin(theta), np.cos(theta)))
            ect_dir = cplx.compute_euler_characteristic_transform(direction)
            T = np.linspace(xinterval[0], xinterval[1], xpoints)
            ect1[i] = [ect_dir.evaluate(t) for t in T]
        return ect1


def compute_ExIm(names, datafolder, k=480, xinterval=(-1.5, 1.5), xpoints=300, yinterval=(-450., 50.), ypoints=500):
    ExImage = []
    
    for nm in names: 
        file_path = os.path.join(datafolder, nm)
        with Image.open(file_path) as img:
            img_array = np.array(img)
        
        # Now, compute the ECT image using the new array
        ect = EctImg(nm, img_array, k, xinterval, xpoints, yinterval, ypoints)
        exim = ect.compute(img_array, k, xinterval, xpoints, yinterval, ypoints)
        ExImage.append(exim)

    return ExImage

exims = compute_ExIm(names_k14, datafolder,k=360, xinterval=(-1.5, 1.5), xpoints=75, yinterval=(-20, 60), ypoints=80)
flattened_k14 = [image.flatten() for image in exims]

### ect

In [4]:
import os
import tifffile as tiff
import eucalc as ec
import matplotlib.colors as mcolors
from PIL import Image
class EctImg:
    def __init__(self, nm, img, k=20, xinterval=(-1., 1.), xpoints=100, yinterval=(-1., 1.), ypoints=100):
        self.xinterval = xinterval
        self.yinterval = yinterval
        self.xpoints = xpoints
        self.ypoints = ypoints
        self.image = self.compute(img, k, xinterval, xpoints, yinterval, ypoints)
        self.nm = nm
    def compute(self, img, k, xinterval, xpoints, yinterval, ypoints):
        cplx = ec.EmbeddedComplex(img)
        cplx.preproc_ect()
        thetas = np.linspace(0, 2 * np.pi, k + 1)
        ect1 = np.empty((k, xpoints), dtype=float)
        for i in range(k):
            theta = thetas[i]
            direction = np.array((np.sin(theta), np.cos(theta)))
            ect_dir = cplx.compute_euler_characteristic_transform(direction)
            T = np.linspace(xinterval[0], xinterval[1], xpoints)
            ect1[i] = [ect_dir.evaluate(t) for t in T]
        return ect1
    
    def plot(self):
        plt.figure(figsize=(10, 8))
        # Using xinterval and yinterval directly in extent
        plt.imshow(self.image, aspect='auto', extent=[self.xinterval[0], self.xinterval[1], self.yinterval[0], self.yinterval[1]], origin='lower', interpolation='none')
        plt.colorbar(label='Density')
        plt.xlabel('X-axis')
        plt.ylabel('Y-axis')
        plt.title('ECT Image Plot for '+ self.nm)
        plt.show()


def compute_ExIm(names, datafolder, k=480, xinterval=(-1.5, 1.5), xpoints=300, yinterval=(-450., 50.), ypoints=500):
    ExImage = []
    
    for nm in names: 
        file_path = os.path.join(datafolder, nm)
        with Image.open(file_path) as img:
            img_array = np.array(img)
        
        # Now, compute the ECT image using the new array
        ect = EctImg(nm, img_array, k, xinterval, xpoints, yinterval, ypoints)
        exim = ect.compute(img_array, k, xinterval, xpoints, yinterval, ypoints)
        ExImage.append(exim)

    return ExImage

exims = compute_ExIm(names_k14, datafolder,k=360, xinterval=(-1.5, 1.5), xpoints=300, yinterval=(-20, 60), ypoints=80)
flattened_k14 = [image.flatten() for image in exims]

In [None]:
import shap
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np
import matplotlib.pyplot as plt
from skimage.measure import block_reduce

def extract_age(name):
    return name[0]

import os
import tifffile as tiff
import eucalc as ec
import matplotlib.colors as mcolors
from PIL import Image
class EctImg:
    def __init__(self, nm, img, k=20, xinterval=(-1., 1.), xpoints=100, yinterval=(-1., 1.), ypoints=100):
        self.xinterval = xinterval
        self.yinterval = yinterval
        self.xpoints = xpoints
        self.ypoints = ypoints
        self.image = self.compute(img, k, xinterval, xpoints, yinterval, ypoints)
        self.nm = nm
    def compute(self, img, k, xinterval, xpoints, yinterval, ypoints):
        cplx = ec.EmbeddedComplex(img)
        cplx.preproc_ect()
        thetas = np.random.uniform(0, 2 * np.pi, k + 1)
        ect1 = np.empty((k, xpoints), dtype=float)
        for i in range(k):
            theta = thetas[i]
            direction = np.array((np.sin(theta), np.cos(theta)))
            ect_dir = cplx.compute_euler_characteristic_transform(direction)
            T = np.linspace(xinterval[0], xinterval[1], xpoints)
            ect1[i] = [ect_dir.evaluate(t) for t in T]
        return ect1


def compute_ExIm(names, datafolder, k=480, xinterval=(-1.5, 1.5), xpoints=300, yinterval=(-450., 50.), ypoints=500):
    ExImage = []
    
    for nm in names: 
        file_path = os.path.join(datafolder, nm)
        with Image.open(file_path) as img:
            img_array = np.array(img)
        
        # Now, compute the ECT image using the new array
        ect = EctImg(nm, img_array, k, xinterval, xpoints, yinterval, ypoints)
        exim = ect.compute(img_array, k, xinterval, xpoints, yinterval, ypoints)
        ExImage.append(exim)

    return ExImage
max_score = 0
for i in range(50):
    exims = compute_ExIm(names_k14, datafolder,k=200, xinterval=(-1.5, 1.5), xpoints=75, yinterval=(-20, 60), ypoints=80)
    flattened_k14 = [image.flatten() for image in exims]
    
    # Assume names_k8 and flattened_k8 are already defined
    ages_k14 = [extract_age(name) for name in names_k14]
    X = np.array(flattened_k14)
    y = np.array(ages_k14)
    unique_ages = np.unique(y)
    ypoints, xpoints = 110, 300  # Set the appropriate image dimensions

    # Number of trials for train-test splitting
    num_trials = 50
    age = unique_ages[0]
    print(f"Processing age group: {age}")

    # Binary classification: 1 if the sample belongs to the current age, 0 otherwise
    binary_labels = np.where(y == age, 1, 0)

    # Parameter grid for Random Forest
    param_grid = {
        'n_estimators': [50, 100, 200, 500, 1000],   # Number of trees in the forest
        'max_depth': [3, 5, 7, 20, None],             # Tree depth (None means no limit)
        'min_samples_split': [2, 3, 5, 10],           # Minimum samples required to split a node
        'min_samples_leaf': [1, 2, 4]
    }

    # Perform Grid Search to get the best parameters
    grid_search = GridSearchCV(
        estimator=RandomForestClassifier(random_state=40),
        param_grid=param_grid,
        scoring='accuracy',
        cv=3,
        n_jobs=-1
    )
    grid_search.fit(X, binary_labels)

    # Extract best parameters from the grid search
    best_params = grid_search.best_params_
    print(f"Best parameters for age group {age}: {best_params}")

    scores = []
    # Loop for multiple random splits
    for trial in range(num_trials):
        # Randomize train-test split
        X_train, X_test, y_train, y_test = train_test_split(X, binary_labels, test_size=0.2, random_state=40+trial)

        # Initialize and train the Random Forest classifier using best_params
        rf_classifier = RandomForestClassifier(random_state=40, **best_params)
        rf_classifier.fit(X_train, y_train)
        score = rf_classifier.score(X_test, y_test)
        scores.append(score)
    max_score = max(max_score, np.mean(scores))
    print(f'Average accuracy for loop {i} is {np.mean(scores)}')
print(f'Maximum accuracy is {max_score}')


Processing age group: O
Best parameters for age group O: {'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 50}
Average accuracy for loop 0 is 0.6871428571428573
Processing age group: O
Best parameters for age group O: {'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 1000}
Average accuracy for loop 1 is 0.6971428571428572
Processing age group: O
Best parameters for age group O: {'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 50}
Average accuracy for loop 2 is 0.6928571428571427
Processing age group: O
Best parameters for age group O: {'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 200}
Average accuracy for loop 3 is 0.69
Processing age group: O
Best parameters for age group O: {'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 200}
Average accuracy for loop 4 is 0.6985714285714286
Processing age group: O
Best parameters for age gro

In [6]:
import shap
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np
import matplotlib.pyplot as plt
from skimage.measure import block_reduce

def extract_age(name):
    return name[0]

# Assume names_k8 and flattened_k8 are already defined
ages_k14 = [extract_age(name) for name in names_k14]
X = np.array(flattened_k14)
y = np.array(ages_k14)
unique_ages = np.unique(y)
ypoints, xpoints = 110, 300  # Set the appropriate image dimensions

# Number of trials for train-test splitting
num_trials = 50
age = unique_ages[0]
print(f"Processing age group: {age}")

# Binary classification: 1 if the sample belongs to the current age, 0 otherwise
binary_labels = np.where(y == age, 1, 0)

# Parameter grid for Random Forest
param_grid = {
    'n_estimators': [50, 100, 200, 500, 1000],   # Number of trees in the forest
    'max_depth': [3, 5, 7, 20, None],             # Tree depth (None means no limit)
    'min_samples_split': [2, 3, 5, 10],           # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4]
}

# Perform Grid Search to get the best parameters
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=40),
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,
    n_jobs=-1
)
grid_search.fit(X, binary_labels)

# Extract best parameters from the grid search
best_params = grid_search.best_params_
print(f"Best parameters for age group {age}: {best_params}")

scores = []
# Loop for multiple random splits
for trial in range(num_trials):
    # Randomize train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, binary_labels, test_size=0.2, random_state=40+trial)

    # Initialize and train the Random Forest classifier using best_params
    rf_classifier = RandomForestClassifier(random_state=40, **best_params)
    rf_classifier.fit(X_train, y_train)
    score = rf_classifier.score(X_test, y_test)
    scores.append(score)
    
print(f'Average accuracy for age {age} is {np.mean(scores)}')


Processing age group: O
Best parameters for age group O: {'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 50}
Average accuracy for age O is 0.6746666666666666


In [5]:
import shap
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from skimage.measure import block_reduce

# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200, 500, 1000, 5000],       # Number of trees in the forest
    'max_depth': [3, 5, 7, 20, None],                      # Tree depth (None means no limit)
    'min_samples_split': [2, 3, 5, 10],                    # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4]
}

def extract_age(name):
    return name[0]

# Assuming names_k14 and flattened_k14 are already defined
ages_k14 = [extract_age(name) for name in names_k14]
X = np.array(flattened_k14)
y = np.array(ages_k14)
unique_ages = np.unique(y)
ypoints, xpoints = 110, 300  # Set the appropriate image dimensions

# Number of trials for train-test splitting
num_trials = 50

for age in unique_ages:
    print(f"\nProcessing age group: {age}")
    # Binary classification: 1 if the sample belongs to the current age, 0 otherwise
    binary_labels = np.where(y == age, 1, 0)
    best_score = 0
    best_params = {}
    
    # Loop over every combination of parameters
    for n_estimators in param_grid['n_estimators']:
        for max_depth in param_grid['max_depth']:
            for min_samples_split in param_grid['min_samples_split']:
                for min_samples_leaf in param_grid['min_samples_leaf']:
                    scores = []
                    # Loop for multiple random splits
                    for trial in range(num_trials):
                        # Randomize train-test split
                        X_train, X_test, y_train, y_test = train_test_split(
                            X, binary_labels, test_size=0.2, random_state=42 + trial
                        )
                        
                        # Initialize and train the Random Forest classifier with current parameters
                        rf_classifier = RandomForestClassifier(
                            n_estimators=n_estimators,
                            max_depth=max_depth,
                            min_samples_split=min_samples_split,
                            min_samples_leaf=min_samples_leaf,
                            random_state=40
                        )
                        rf_classifier.fit(X_train, y_train)
                        score = rf_classifier.score(X_test, y_test)
                        scores.append(score)
                    
                    avg_score = np.mean(scores)
                    
                    # Update best score and parameters if current avg_score is better
                    if avg_score > best_score:
                        best_score = avg_score
                        best_params = {
                            'n_estimators': n_estimators,
                            'max_depth': max_depth,
                            'min_samples_split': min_samples_split,
                            'min_samples_leaf': min_samples_leaf
                        }
                    
                    # Print best accuracy so far after evaluating this set of parameters
                    print(f"Params: n_estimators={n_estimators}, max_depth={max_depth}, "
                          f"min_samples_split={min_samples_split}, min_samples_leaf={min_samples_leaf}, "
                          f"avg_accuracy={avg_score:.4f}, Best so far: {best_score:.4f}")
    
    # Print best parameter set for the current age group
    print(f"\nBest parameters for age {age}: {best_params} with accuracy {best_score:.4f}")



Processing age group: O
Params: n_estimators=50, max_depth=3, min_samples_split=2, min_samples_leaf=1, avg_accuracy=0.6773, Best so far: 0.6773
Params: n_estimators=50, max_depth=3, min_samples_split=2, min_samples_leaf=2, avg_accuracy=0.6813, Best so far: 0.6813
Params: n_estimators=50, max_depth=3, min_samples_split=2, min_samples_leaf=4, avg_accuracy=0.6853, Best so far: 0.6853
Params: n_estimators=50, max_depth=3, min_samples_split=3, min_samples_leaf=1, avg_accuracy=0.6733, Best so far: 0.6853
Params: n_estimators=50, max_depth=3, min_samples_split=3, min_samples_leaf=2, avg_accuracy=0.6813, Best so far: 0.6853
Params: n_estimators=50, max_depth=3, min_samples_split=3, min_samples_leaf=4, avg_accuracy=0.6853, Best so far: 0.6853
Params: n_estimators=50, max_depth=3, min_samples_split=5, min_samples_leaf=1, avg_accuracy=0.6760, Best so far: 0.6853
Params: n_estimators=50, max_depth=3, min_samples_split=5, min_samples_leaf=2, avg_accuracy=0.6733, Best so far: 0.6853


KeyboardInterrupt: 