In [1]:
# importing relevant libraries and packages
import os
import json
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GraphConv, global_mean_pool
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
import snntorch as snn

# category mappings for LDL as integers
ldl_mapping = {
    'Very Low Detection (High Sensitivity)': 0,
    'Low Detection': 1,
    'Moderate Detection': 2,
    'High Detection (Lower Sensitivity)': 3,
    'Very High Detection (Low Sensitivity)': 4
}

def extract_features(descriptors):
    numerical_values = []
    for key, value in descriptors.items():
        if key == "CID":
            continue
        if isinstance(value, (int, float)):
            numerical_values.append(float(value))
        elif isinstance(value, str) and value.replace('.', '', 1).isdigit():
            numerical_values.append(float(value))
    return torch.tensor(numerical_values, dtype=torch.float)

# step 1: Find maximum feature length across JSON files
def find_max_feature_length(json_files, folder_path):
    max_feature_length = 0
    for f in json_files:
        data = json.load(open(os.path.join(folder_path, f), 'r'))
        for section in ['detect_target', 'probe_material', 'test_medium_electrolyte']:
            for item in data.get(section, []):
                feature_length = len(extract_features(item['substance_descriptors']))
                max_feature_length = max(max_feature_length, feature_length)
    return max_feature_length

# updated helper function to extract features and include Magpie Descriptors for "inorganic solid"
def extract_features_with_magpie(descriptors, substance_type):
    numerical_values = []
    for key, value in descriptors.items():
        if key == "CID" or (substance_type == "inorganic solid" and "MagpieData" in key):
            continue
        if isinstance(value, (int, float)):
            numerical_values.append(float(value))
        elif isinstance(value, str) and value.replace('.', '', 1).isdigit():
            numerical_values.append(float(value))
    
    # add Magpie Descriptors for inorganic solids if they exist
    if substance_type == "inorganic solid" and descriptors.get("Magpie Descriptors") is not None:
        magpie_descriptors = [
            float(v) for k, v in descriptors["Magpie Descriptors"].items() if isinstance(v, (int, float))
        ]
        numerical_values.extend(magpie_descriptors)
    
    return torch.tensor(numerical_values, dtype=torch.float)


# updated aggregation function to use the new extract_features_with_magpie function
def aggregate_by_type(items, max_feature_length):
    types = {'small molecule': [], 'inorganic solid': [], 'polymer': []}
    for item in items:
        substance_type = item.get('substance_type', '').lower()
        if substance_type in types:
            # extract features, considering Magpie Descriptors if the type is "inorganic solid"
            feature_tensor = F.pad(
                extract_features_with_magpie(item['substance_descriptors'], substance_type),
                (0, max_feature_length - len(extract_features_with_magpie(item['substance_descriptors'], substance_type)))
            )
            types[substance_type].append(feature_tensor)
    
    aggregated_features = []
    for type_key in types:
        if types[type_key]:
            aggregated_features.append(torch.mean(torch.stack(types[type_key]), dim=0))
        else:
            aggregated_features.append(torch.zeros(max_feature_length))
    
    return torch.cat(aggregated_features)

# generate graph from JSON data 
def generate_graph_from_json(data, max_feature_length):
    nodes = []
    edges = []

    # aggregate target, probe, medium nodes with updated aggregation function
    target_node = aggregate_by_type(data.get('detect_target', []), max_feature_length)
    probe_node = aggregate_by_type(data.get('probe_material', []), max_feature_length)
    medium_node = aggregate_by_type(data.get('test_medium_electrolyte', []), max_feature_length)

    # conditions node
    conditions_features = [
        data.get("test_operating_temperature_celsius", 0.0),
        data.get("min_pH_when_testing", -1.0),
        data.get("max_pH_when_testing", 0.0)
    ]
    conditions_node = F.pad(torch.tensor(conditions_features, dtype=torch.float),
                            (0, max_feature_length * 3 - len(conditions_features)))

    nodes.extend([target_node, probe_node, medium_node, conditions_node])

    edges = [
        (0, 1),  # Target -> Probe
        (0, 2),  # Target -> Medium
        (1, 2),  # Probe -> Medium
        (3, 2)   # Conditions -> Medium
    ]

    x = torch.stack(nodes)
    edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
    ldl_label = ldl_mapping.get(data.get("LDL_category", ""), 0)
    return Data(x=x, edge_index=edge_index, y=torch.tensor(ldl_label, dtype=torch.long))

# load and process data--this time, getting only the original data
folder_path = 'your_path'
json_files = [f for f in os.listdir(folder_path) if f.endswith('.json') and not f.endswith('_original.json')]
max_feature_length = find_max_feature_length(json_files, folder_path)

# the rest of the data loading and processing code remains unchanged
def extract_and_concatenate_vectors(substances):
    vector_data = []
    for substance in substances:
        descriptors = substance.get('substance_descriptors', {})
        for key in ["Morgan_128", "maccs_fp", "morgan_fp_128"]:
            value = descriptors.get(key, [])
            if isinstance(value, list):
                vector_data.extend(value)
    return np.array(vector_data)

snn_data = []
gnn_data = []
labels = []

# find the maximum vector length across all files
max_vector_length = 0
all_vectors = []

for f in json_files:
    # load JSON data
    data = json.load(open(os.path.join(folder_path, f), 'r'))

    # generate graph and append to gnn_data
    graph = generate_graph_from_json(data, max_feature_length)
    if graph is not None:
        gnn_data.append(graph)

        # generate the spiking vector by concatenating target, probe, and medium
        spiking_vector = np.concatenate([
            extract_and_concatenate_vectors(data.get('detect_target', [])),
            extract_and_concatenate_vectors(data.get('probe_material', [])),
            extract_and_concatenate_vectors(data.get('test_medium_electrolyte', []))
        ])
        all_vectors.append(spiking_vector)

        # update the maximum vector length if needed
        max_vector_length = max(max_vector_length, len(spiking_vector))
        # append the label for classification
        labels.append(graph.y.item())

# pad all_vectors to the maximum vector length
padded_vectors = [np.pad(vec, (0, max_vector_length - len(vec)), 'constant') for vec in all_vectors]

# append each padded vector directly to snn_data
snn_data.extend(padded_vectors)


  Referenced from: <E57B6A01-82F8-3C7E-AE6D-AE7FA09C6614> /Users/rpf/anaconda3/envs/snn-gnn-env/lib/python3.8/site-packages/torch_scatter/_version_cpu.so
  Expected in:     <AEDB2D9B-AE02-3964-90EC-49E2AD5A10A1> /Users/rpf/anaconda3/envs/snn-gnn-env/lib/python3.8/site-packages/torch/lib/libtorch_cpu.dylib
  Referenced from: <BF13A8D9-C637-3AAC-BA9B-800642AA6D9D> /Users/rpf/anaconda3/envs/snn-gnn-env/lib/python3.8/site-packages/torch_cluster/_version_cpu.so
  Expected in:     <AEDB2D9B-AE02-3964-90EC-49E2AD5A10A1> /Users/rpf/anaconda3/envs/snn-gnn-env/lib/python3.8/site-packages/torch/lib/libtorch_cpu.dylib
  Referenced from: <295D5E1A-8A25-3C41-86C8-66ACA79CC8B9> /Users/rpf/anaconda3/envs/snn-gnn-env/lib/python3.8/site-packages/torch_spline_conv/_version_cpu.so
  Expected in:     <AEDB2D9B-AE02-3964-90EC-49E2AD5A10A1> /Users/rpf/anaconda3/envs/snn-gnn-env/lib/python3.8/site-packages/torch/lib/libtorch_cpu.dylib
  Referenced from: <F0B43E83-2685-37E2-913C-9F61B8C68F9B> /Users/rpf/anacon

In [2]:
snn_data = torch.FloatTensor(snn_data)
labels = torch.LongTensor(labels)

X_train, X_test, y_train, y_test = train_test_split(snn_data, labels, test_size=0.2, random_state=42)
train_data, test_data = train_test_split(gnn_data, test_size=0.2, random_state=42)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64)

# spike Latency Encoding
def spike_latency_encoding(vector, num_time_steps):
    spike_train = torch.zeros((num_time_steps, len(vector)))
    for idx, value in enumerate(vector):
        if value > 0:  # non-zero entries encode spikes
            time_step = idx % num_time_steps  # assign spike based on index
            spike_train[time_step, idx] = 1
    return spike_train

# Rate Encoding
def rate_encoding(vector, num_time_steps):
    spike_train = torch.zeros((num_time_steps, len(vector)))
    for idx, value in enumerate(vector):
        if value > 0:  # non-zero entries fire spikes
            spike_train[:, idx] = 1  # fire consistently across all timesteps
    return spike_train
    
# parameters
num_time_steps = 16  # define a fixed number of time steps

# apply encodings to all samples in snn_data
latency_encoded_data = [spike_latency_encoding(vector, num_time_steps) for vector in snn_data]
rate_encoded_data = [rate_encoding(vector, num_time_steps) for vector in snn_data]

# define GNN model with customizable number of layers
class GNN(nn.Module):
    def __init__(self, num_features, hidden_channels, num_classes, num_layers):
        super(GNN, self).__init__()
        self.convs = nn.ModuleList([GraphConv(num_features if i == 0 else hidden_channels, hidden_channels) for i in range(num_layers)])
        self.lin = nn.Linear(hidden_channels, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        for conv in self.convs:
            x = conv(x, edge_index)
            x = F.relu(x)
        x = global_mean_pool(x, batch)
        x = self.lin(x)
        return x

  snn_data = torch.FloatTensor(snn_data)


In [3]:
class LIFNeuronLayer(nn.Module):
    def __init__(self, input_size, hidden_size, beta, threshold):
        super(LIFNeuronLayer, self).__init__()
        self.fc = nn.Linear(input_size, hidden_size)  # fully connected layer
        self.beta = beta  # leaky factor
        self.threshold = threshold  # spiking threshold
        self.membrane_potential = None  # dynamically initialized
    
    def forward(self, x):
        batch_size = x.shape[0]
        hidden_size = self.fc.out_features

        # initialize membrane potential if not done or shape doesn't match
        if self.membrane_potential is None or self.membrane_potential.shape != (batch_size, hidden_size):
            self.membrane_potential = torch.zeros(batch_size, hidden_size, device=x.device)

        outputs = []  # store spikes for each time step
        for t in range(x.shape[1]):  # iterate over time steps
            input_at_t = x[:, t, :]  # input at the current time step
            fc_output = self.fc(input_at_t)  # weighted input

            # update membrane potential with leaky integration
            self.membrane_potential = self.beta * self.membrane_potential + fc_output

            # generate spikes using surrogate gradient
            spikes = self.spike_function(self.membrane_potential)
            # reset membrane potential for neurons that spiked
            self.membrane_potential = self.membrane_potential * (1 - spikes)

            outputs.append(spikes)  # save spikes for the current time step

        # aggregate spikes over time (mean)
        aggregated_output = torch.stack(outputs, dim=1).mean(dim=1)  # shape: [batch_size, hidden_size]
        return aggregated_output

    @staticmethod
    def spike_function(membrane_potential):
        """
        Surrogate gradient for spiking function.
        Approximates the gradient with a smooth curve.
        """
        threshold = 1.0
        return (membrane_potential >= threshold).float() + \
               torch.sigmoid(membrane_potential - threshold) * (1 - (membrane_potential >= threshold).float())


In [4]:
class SNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, beta, threshold):
        super(SNNClassifier, self).__init__()
        self.lif_layer = LIFNeuronLayer(input_size, hidden_size, beta, threshold)
        self.fc_out = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lif_output = self.lif_layer(x)  # process input with LIF layer
        return self.fc_out(lif_output)  # classify using the final layer

class SurrogateCrossEntropyLoss(nn.Module):
    def __init__(self):
        super(SurrogateCrossEntropyLoss, self).__init__()
        self.ce_loss = nn.CrossEntropyLoss()

    def forward(self, outputs, targets):
        """
        apply surrogate gradient mechanism during training only.
        """
        loss = self.ce_loss(outputs, targets)

        # only register hook if gradients are being tracked (training mode)
        if outputs.requires_grad:
            outputs.register_hook(lambda grad: grad * torch.sigmoid(outputs) * (1 - torch.sigmoid(outputs)))

        return loss


In [5]:
class MultiModalModel(nn.Module):
    def __init__(self, snn_input_size, snn_hidden_size, gnn_input_size, gnn_hidden_size, num_classes, snn_beta=0.9, threshold=1.0, gnn_layers=2):
        super(MultiModalModel, self).__init__()
        self.snn = SNNClassifier(snn_input_size, snn_hidden_size, num_classes, beta=snn_beta, threshold=threshold)
        self.gnn = GNN(gnn_input_size, gnn_hidden_size, num_classes, num_layers=gnn_layers)
        self.fusion_layer = nn.Linear(num_classes * 2, num_classes)

    def forward(self, snn_input, gnn_data):
        snn_output = self.snn(snn_input)
        gnn_output = self.gnn(gnn_data)
        combined_output = torch.cat((snn_output, gnn_output), dim=1)
        final_output = self.fusion_layer(combined_output)
        return final_output

In [7]:
def train_multimodal(snn_loader, gnn_loader, model, optimizer, criterion, device):
    model.train()
    total_loss = 0

    for (snn_batch, gnn_data) in zip(snn_loader, gnn_loader):
        snn_input, snn_labels = snn_batch
        snn_input, snn_labels = snn_input.to(device), snn_labels.to(device)
        gnn_data = gnn_data.to(device)

        optimizer.zero_grad()

        # forward pass through the multimodal model
        output = model(snn_input, gnn_data)

        # reset SNN membrane potential to avoid state retention
        if hasattr(model.snn.lif_layer, 'membrane_potential'):
            model.snn.lif_layer.membrane_potential = None

        # calculate loss
        loss = criterion(output, gnn_data.y)

        # backward pass and optimization
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(snn_loader)

def test_multimodal(snn_loader, gnn_loader, model, criterion, device):
    model.eval()
    total_loss = 0
    preds, labels = [], []

    with torch.no_grad():
        for (snn_batch, gnn_data) in zip(snn_loader, gnn_loader):
            snn_input, snn_labels = snn_batch
            snn_input, snn_labels = snn_input.to(device), snn_labels.to(device)
            gnn_data = gnn_data.to(device)

            # forward pass through multimodal model
            output = model(snn_input, gnn_data)

            # compute loss
            loss = criterion(output, gnn_data.y)
            total_loss += loss.item()

            # save predictions and true labels
            pred = output.argmax(dim=1)
            preds.extend(pred.cpu().numpy())  # convert to numpy array
            labels.extend(gnn_data.y.cpu().numpy())  # convert to numpy array

    # calculate metrics
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    avg_loss = total_loss / len(snn_loader)

    return avg_loss, accuracy, f1, precision, recall


In [9]:
def process_original_data(folder_path):
    json_files_org = [file for file in os.listdir(folder_path) if file.endswith('_original.json')]
    max_feature_length_org = find_max_feature_length(json_files_org, folder_path)

    snn_data = []
    gnn_data = []
    labels = []
    
    # find the maximum vector length across all files
    max_vector_length = 0
    all_vectors = []
    
    for f in json_files_org:
        file_path = os.path.join(folder_path, f)  # construct the full path to the file
        
        # load JSON data
        with open(file_path, 'r') as json_file:
            data = json.load(json_file)
    
        # generate graph and append to gnn_data
        graph = generate_graph_from_json(data, max_feature_length_org)
        if graph is not None:
            gnn_data.append(graph)
    
            # generate the spiking vector by concatenating target, probe, and medium
            spiking_vector = np.concatenate([
                extract_and_concatenate_vectors(data.get('detect_target', [])),
                extract_and_concatenate_vectors(data.get('probe_material', [])),
                extract_and_concatenate_vectors(data.get('test_medium_electrolyte', []))
            ])
            all_vectors.append(spiking_vector)
    
            # update the maximum vector length if needed
            max_vector_length = max(max_vector_length, len(spiking_vector))
    
            # append the label for classification
            labels.append(graph.y.item())
    
    # pad all_vectors to the maximum vector length
    padded_vectors = [np.pad(vec, (0, max_vector_length - len(vec)), 'constant') for vec in all_vectors]
    
    # append each padded vector directly to snn_data
    snn_data.extend(padded_vectors)

    snn_data_org = torch.FloatTensor(snn_data)
    labels_org = torch.LongTensor(labels)
    
    # parameters
    num_time_steps = 16  # define a fixed number of time steps
    
    # apply encodings to all samples in snn_data
    latency_encoded_data = [spike_latency_encoding(vector, num_time_steps) for vector in snn_data_org]
    #rate_encoded_data = [rate_encoding(vector, num_time_steps) for vector in snn_data_org]

    encoded_data = torch.stack([spike_latency_encoding(vector, num_time_steps) for vector in snn_data_org])

    #encoded_data = torch.stack([rate_encoding(vector, num_time_steps) for vector in snn_data_org])
    
    # define the batch size
    batch_size = 64
    dataset = TensorDataset(encoded_data, labels_org)
    train_size = int(0.001 * len(encoded_data)) # evaluating in the entire original dataset
    test_size = len(encoded_data) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    #snn_test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    _, X_test, _, y_test = train_test_split(snn_data_org, labels_org, test_size=0.999, random_state=42)
    _, test_data = train_test_split(gnn_data, test_size=0.999, random_state=42)
    
    snn_test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    test_loader = GeoDataLoader(test_data, batch_size=batch_size, shuffle=False)

    return snn_test_loader, test_loader, labels_org

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
gnn_input_size = gnn_data[0].x.shape[1]

model = MultiModalModel(
    snn_input_size=snn_data.shape[1],  # 1358 based on snn_data.shape
    snn_hidden_size=64,
    gnn_input_size=gnn_input_size,     # matching the actual feature size of gnn_data.x
    gnn_hidden_size=64,
    num_classes=5,
    snn_beta=0.50,
    threshold=1.0,
    gnn_layers=7
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

In [12]:
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import random_split
# prepare data for SNN--using latency encoding
encoded_data = torch.stack([spike_latency_encoding(vector, num_time_steps) for vector in snn_data])

# prepare data for SNN--using latency encoding
#encoded_data = torch.stack([rate_encoding(vector, num_time_steps) for vector in snn_data])


# define the batch size
batch_size = 64
dataset = TensorDataset(encoded_data, labels)
train_size = int(0.8 * len(encoded_data))
test_size = len(encoded_data) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
snn_train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
snn_test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [14]:
import numpy as np
import copy
from tqdm import tqdm
from torch_geometric.loader import DataLoader as GeoDataLoader

# number of trials and epochs per trial
num_trials = 10
num_epochs = 200

folder_path_original = 'your_path'
snn_test_loader_org, test_loader_org, labels_org = process_original_data(folder_path_original)

# store results
results = {"accuracy": [], "f1": [], "precision": [], "recall": []}
original_results = {"accuracy": [], "f1": [], "precision": [], "recall": []}

# save the initial model state
initial_model_state = copy.deepcopy(model.state_dict())

# outer loop: Trials
for trial in range(num_trials):
    print(f"\nTrial {trial + 1}/{num_trials}")
    
    # reset the model and optimizer
    model.load_state_dict(initial_model_state)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # progress bar for epochs
    with tqdm(total=num_epochs, desc=f"Training Trial {trial + 1}", leave=False) as pbar:
        # Inner loop: Epochs
        for epoch in range(num_epochs):
            train_loss = train_multimodal(snn_train_loader, train_loader, model, optimizer, criterion, device)
            pbar.update(1)  # update progress bar

    # evaluate on test loaders
    test_loss, accuracy, f1, precision, recall = test_multimodal(snn_test_loader, test_loader, model, criterion, device)
    print(f"Trial {trial + 1} Results - Test Set:")
    print(f"Accuracy: {accuracy:.4f}, F1: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

    # store the metrics
    results["accuracy"].append(accuracy)
    results["f1"].append(f1)
    results["precision"].append(precision)
    results["recall"].append(recall)

    # evaluate on the original dataset
    test_loss_org, accuracy_org, f1_org, precision_org, recall_org = test_multimodal(
        snn_test_loader_org, test_loader_org, model, criterion, device
    )
    print(f"Trial {trial + 1} Results - Original Dataset:")
    print(f"Accuracy: {accuracy_org:.4f}, F1: {f1_org:.4f}, Precision: {precision_org:.4f}, Recall: {recall_org:.4f}")

    # store the metrics for the original dataset
    original_results["accuracy"].append(accuracy_org)
    original_results["f1"].append(f1_org)
    original_results["precision"].append(precision_org)
    original_results["recall"].append(recall_org)

# compute averages and standard deviations for the test dataset
print("\nAggregated Results on Test Dataset:")
for metric, values in results.items():
    mean = np.mean(values)
    std_dev = np.std(values)
    print(f"{metric.capitalize()} - Mean: {mean:.4f}, Std Dev: {std_dev:.4f}")

# compute averages and standard deviations for the original dataset
print("\nAggregated Results on Original Dataset:")
for metric, values in original_results.items():
    mean = np.mean(values)
    std_dev = np.std(values)
    print(f"{metric.capitalize()} - Mean: {mean:.4f}, Std Dev: {std_dev:.4f}")



Trial 1/10


                                                                                

Trial 1 Results - Test Set:
Accuracy: 0.8889, F1: 0.8891, Precision: 0.8900, Recall: 0.8889
Trial 1 Results - Original Dataset:
Accuracy: 0.9068, F1: 0.9069, Precision: 0.9076, Recall: 0.9068

Trial 2/10


                                                                                

Trial 2 Results - Test Set:
Accuracy: 0.8949, F1: 0.8954, Precision: 0.8986, Recall: 0.8949
Trial 2 Results - Original Dataset:
Accuracy: 0.9118, F1: 0.9122, Precision: 0.9148, Recall: 0.9118

Trial 3/10


                                                                                

Trial 3 Results - Test Set:
Accuracy: 0.8857, F1: 0.8857, Precision: 0.8858, Recall: 0.8857
Trial 3 Results - Original Dataset:
Accuracy: 0.9076, F1: 0.9075, Precision: 0.9077, Recall: 0.9076

Trial 4/10


                                                                                

Trial 4 Results - Test Set:
Accuracy: 0.8803, F1: 0.8802, Precision: 0.8825, Recall: 0.8803
Trial 4 Results - Original Dataset:
Accuracy: 0.9043, F1: 0.9043, Precision: 0.9068, Recall: 0.9043

Trial 5/10


                                                                                

Trial 5 Results - Test Set:
Accuracy: 0.8814, F1: 0.8815, Precision: 0.8833, Recall: 0.8814
Trial 5 Results - Original Dataset:
Accuracy: 0.9043, F1: 0.9044, Precision: 0.9056, Recall: 0.9043

Trial 6/10


                                                                                

Trial 6 Results - Test Set:
Accuracy: 0.8879, F1: 0.8876, Precision: 0.8907, Recall: 0.8879
Trial 6 Results - Original Dataset:
Accuracy: 0.9102, F1: 0.9102, Precision: 0.9126, Recall: 0.9102

Trial 7/10


                                                                                

Trial 7 Results - Test Set:
Accuracy: 0.8863, F1: 0.8864, Precision: 0.8887, Recall: 0.8863
Trial 7 Results - Original Dataset:
Accuracy: 0.9068, F1: 0.9071, Precision: 0.9083, Recall: 0.9068

Trial 8/10


                                                                                

Trial 8 Results - Test Set:
Accuracy: 0.8863, F1: 0.8867, Precision: 0.8886, Recall: 0.8863
Trial 8 Results - Original Dataset:
Accuracy: 0.9110, F1: 0.9113, Precision: 0.9126, Recall: 0.9110

Trial 9/10


                                                                                

Trial 9 Results - Test Set:
Accuracy: 0.8787, F1: 0.8790, Precision: 0.8805, Recall: 0.8787
Trial 9 Results - Original Dataset:
Accuracy: 0.9076, F1: 0.9080, Precision: 0.9089, Recall: 0.9076

Trial 10/10


                                                                                

Trial 10 Results - Test Set:
Accuracy: 0.8868, F1: 0.8873, Precision: 0.8897, Recall: 0.8868
Trial 10 Results - Original Dataset:
Accuracy: 0.9093, F1: 0.9097, Precision: 0.9111, Recall: 0.9093

Aggregated Results on Test Dataset:
Accuracy - Mean: 0.8857, Std Dev: 0.0044
F1 - Mean: 0.8859, Std Dev: 0.0045
Precision - Mean: 0.8878, Std Dev: 0.0049
Recall - Mean: 0.8857, Std Dev: 0.0044

Aggregated Results on Original Dataset:
Accuracy - Mean: 0.9080, Std Dev: 0.0025
F1 - Mean: 0.9082, Std Dev: 0.0025
Precision - Mean: 0.9096, Std Dev: 0.0029
Recall - Mean: 0.9080, Std Dev: 0.0025
