In [19]:
import os
import pandas as pd
import numpy as np
import stellargraph as sg
from stellargraph.mapper import GraphSAGENodeGenerator
from stellargraph.layer import GraphSAGE
from tensorflow.keras import layers, models, optimizers, losses
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Path to the folders containing the data
dyslexia_folder = "Data/Dyslexic"
control_folder = "Data/Control"

# Function to load data from folder
def load_data_from_folder(folder, label):
    data_list = []
    for filename in os.listdir(folder):
        if filename.endswith(".csv"):
            data = pd.read_csv(os.path.join(folder, filename))
            # Create a unique identifier for each patient based on CSV file name
            data["patient_id"] = filename.split(".")[0]  # Extract patient ID from filename
            data["label"] = label  # Add label column
            data_list.append(data)
    return pd.concat(data_list, ignore_index=True)

# Load dyslexia samples
D_data = load_data_from_folder(dyslexia_folder, "dyslexia")

# Load control samples
C_data = load_data_from_folder(control_folder, "control")

# Combine all data into a single DataFrame
data = pd.concat([D_data, C_data], ignore_index=True)

# Extract features (assuming 'LX', 'LY', 'RX', 'RY' are gaze coordinates)
X = data[['LX', 'LY', 'RX', 'RY']].values

# Scale the features (standardization)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Extract labels
y = (data['label'] == 'dyslexia').astype(int)  # Binary labels (0 for control, 1 for dyslexia)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Create a StellarGraph object from the training data with node features
# Ensure that the index used for nodes matches the size of X_train
graph_train = sg.StellarGraph(nodes=pd.DataFrame(X_train, index=data.loc[y_train.index].index), edges=None)

# Define the GraphSAGE node generator
generator = GraphSAGENodeGenerator(graph_train, batch_size=50, num_samples=[5, 5])

# Define the GraphSAGE model architecture
graphsage_model = GraphSAGE(
    layer_sizes=[32, 32], generator=generator, bias=True, dropout=0.5
)

# Build the Keras model
x_inp, x_out = graphsage_model.in_out_tensors()
prediction = layers.Dense(units=1, activation="sigmoid")(x_out)
model = models.Model(inputs=x_inp, outputs=prediction)

# Compile the model
model.compile(
    optimizer=optimizers.Adam(lr=0.01),
    loss=losses.binary_crossentropy,
    metrics=["acc"]
)

# Train the model
train_node_ids = data.loc[y_train.index].index.tolist()  # Get node IDs for training
test_node_ids = data.loc[y_test.index].index.tolist()  # Get node IDs for testing

# Filter out node IDs that do not exist in the graph
valid_train_node_ids = [node_id for node_id in train_node_ids if node_id in graph_train.nodes()]
valid_test_node_ids = [node_id for node_id in test_node_ids if node_id in graph_train.nodes()]

# Train the model using valid node IDs
history = model.fit(generator.flow(valid_train_node_ids),  # Use valid node IDs for training
                    y_train,
                    epochs=10,
                    validation_data=(generator.flow(valid_test_node_ids), y_test))  # Use valid node IDs for testing

# Evaluate the model
loss, accuracy = model.evaluate(generator.flow(data.loc[y_test.index]), y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")




ValueError: `y` argument is not supported when using `keras.utils.Sequence` as input.

In [24]:
# Get the set of valid node IDs that exist in the graph
valid_graph_node_ids = set(graph_train.nodes())

# Filter valid train and test node IDs
valid_train_node_ids = [node_id for node_id in train_node_ids if node_id in valid_graph_node_ids]
valid_test_node_ids = [node_id for node_id in test_node_ids if node_id in valid_graph_node_ids]

# Prepare targets separately
# For example, if y_train and y_test are already extracted
# y_train = data.loc[y_train.index]['target_column'].values
# y_test = data.loc[y_test.index]['target_column'].values

# Now create the generators with valid node IDs
train_gen = generator.flow(valid_train_node_ids, shuffle=True)
test_gen = generator.flow(valid_test_node_ids, shuffle=False)

# Train the model using the generators
history = model.fit(train_gen, epochs=10, validation_data=(test_gen, None))

Epoch 1/10


ValueError: in user code:

    File "c:\users\sumantha\appdata\local\programs\python\python38\lib\site-packages\keras\src\engine\training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "c:\users\sumantha\appdata\local\programs\python\python38\lib\site-packages\keras\src\engine\training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\users\sumantha\appdata\local\programs\python\python38\lib\site-packages\keras\src\engine\training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "c:\users\sumantha\appdata\local\programs\python\python38\lib\site-packages\keras\src\engine\training.py", line 1080, in train_step
        y_pred = self(x, training=True)
    File "c:\users\sumantha\appdata\local\programs\python\python38\lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\users\sumantha\appdata\local\programs\python\python38\lib\site-packages\keras\src\engine\input_spec.py", line 219, in assert_input_compatibility
        raise ValueError(

    ValueError: Layer "model_2" expects 3 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, None, None) dtype=float32>]


In [27]:
# Example model with appropriate input layer
from keras.layers import Input, Dense
from keras.models import Model

# Define input layer with appropriate shape
input_layer = Input(shape=(num_features,))  # num_features should match the number of features in your data

# Define model architecture
dense_layer = Dense(units=64, activation='relu')(input_layer)
output_layer = Dense(units=num_classes, activation='softmax')(dense_layer)  # num_classes is the number of output classes

# Create the model
model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Assuming train_gen and test_gen are correctly configured generators
history = model.fit(train_gen, epochs=10, validation_data=test_gen)


NameError: name 'num_features' is not defined

In [28]:
from torch_geometric.data import DataLoader

# Assuming pyg_data_list is a list of PyTorch Geometric Data objects
# Ensure that pyg_data_list is defined and populated by calling graphs_to_pyg_data

# Create DataLoader
dataloader = DataLoader(pyg_data_list, batch_size=1, shuffle=True)

# Call train_model function with the DataLoader
train_model(model, dataloader, criterion, optimizer)


NameError: name 'pyg_data_list' is not defined