In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from spektral.utils.convolution import gcn_filter
import matplotlib.pyplot as plt

# Basic configuration
BATCH_SIZE = 32
dataset_path = 'fer2013plus'
img_size = 48           # FER2013 images are 48x48
patch_size = 6          # Using 6x6 patches → 8x8 grid = 64 nodes
color_mode = 'grayscale'

# Create ImageDataGenerator for training/validation
train_data_generator = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    zoom_range=0.15,
    fill_mode='constant',
    horizontal_flip=True,
    validation_split=0.2
)

# Training subset
train_generator = train_data_generator.flow_from_directory(
    f'{dataset_path}/train',
    target_size=(img_size, img_size),
    batch_size=BATCH_SIZE,
    shuffle=True,
    color_mode=color_mode,
    class_mode='categorical',
    subset='training'
)

# Validation subset
validation_generator = train_data_generator.flow_from_directory(
    f'{dataset_path}/train',
    target_size=(img_size, img_size),
    batch_size=BATCH_SIZE,
    shuffle=True,
    color_mode=color_mode,
    class_mode='categorical',
    subset='validation'
)

# Test generator (no augmentation, just rescaling)
test_data_generator = ImageDataGenerator(rescale=1./255)
test_generator = test_data_generator.flow_from_directory(
    f'{dataset_path}/test',
    target_size=(img_size, img_size),
    batch_size=BATCH_SIZE,
    shuffle=False,
    color_mode=color_mode,
    class_mode='categorical'
)

# Modified function to create a grid adjacency matrix with diagonal connections
def create_grid_adj_matrix(n_rows, n_cols, self_loop=True):
    """
    Create adjacency matrix for an n_rows x n_cols grid.
    Each node is connected to its 8 immediate neighbors (if available).
    """
    n_nodes = n_rows * n_cols
    A = np.zeros((n_nodes, n_nodes), dtype=np.float32)
    for r in range(n_rows):
        for c in range(n_cols):
            idx = r * n_cols + c
            if self_loop:
                A[idx, idx] = 1.0
            # Iterate over all 8 possible neighbors
            for dr in [-1, 0, 1]:
                for dc in [-1, 0, 1]:
                    if dr == 0 and dc == 0:
                        continue  # skip self
                    nr, nc = r + dr, c + dc
                    if 0 <= nr < n_rows and 0 <= nc < n_cols:
                        neighbor_idx = nr * n_cols + nc
                        A[idx, neighbor_idx] = 1.0
    return A

# Modified function to convert an image into graph patches with enriched features
def image_to_graph(image, patch_size):
    """
    Split an image into non-overlapping patches, flatten each patch, and append
    the normalized (x,y) coordinates of the patch center.
    """
    h, w = image.shape[:2]
    n_rows = h // patch_size
    n_cols = w // patch_size
    
    image_tensor = tf.convert_to_tensor(image)
    image_tensor = tf.expand_dims(image_tensor, axis=0)
    patches = tf.image.extract_patches(
        images=image_tensor,
        sizes=[1, patch_size, patch_size, 1],
        strides=[1, patch_size, patch_size, 1],
        rates=[1, 1, 1, 1],
        padding='VALID'
    )
    # patches shape: (1, n_rows, n_cols, patch_size*patch_size*c)
    patches = tf.reshape(patches, (-1, patch_size * patch_size * image.shape[-1]))
    patches_np = patches.numpy()  # shape: (n_rows*n_cols, patch_size*patch_size*c)
    
    # Append normalized (x, y) coordinates for each patch
    coords = []
    for r in range(n_rows):
        for c in range(n_cols):
            # Center coordinates normalized to [0,1]
            x_center = (c * patch_size + patch_size/2) / w
            y_center = (r * patch_size + patch_size/2) / h
            coords.append([x_center, y_center])
    coords = np.array(coords)
    
    # Concatenate patch pixel values with coordinates
    # New node feature dimension = patch_size*patch_size*c + 2.
    patches_np = np.concatenate([patches_np, coords], axis=1)
    return patches_np

# Generator that wraps the original ImageDataGenerator and outputs graph data
def graph_generator(original_generator, patch_size, img_size):
    """
    For each batch:
      - Convert images to enriched node features (including patch pixels and coordinates)
      - Attach a fixed adjacency matrix (using grid connectivity with diagonals)
    """
    n_rows = img_size // patch_size  # e.g., 48/6 = 8
    n_cols = img_size // patch_size  # 8 columns → 64 nodes
    # Create and preprocess the enhanced adjacency matrix
    A = create_grid_adj_matrix(n_rows, n_cols, self_loop=True)
    A = gcn_filter(A)
    
    while True:
        images, labels = next(original_generator)
        batch_size = images.shape[0]
        node_features_list = []
        for i in range(batch_size):
            nodes = image_to_graph(images[i], patch_size)
            node_features_list.append(nodes)
        X_batch = np.array(node_features_list)  # shape: (batch_size, 64, patch_size*patch_size*c+2)
        A_batch = np.array([A] * batch_size)      # shape: (batch_size, 64, 64)
        yield ((X_batch, A_batch), labels)

# Define output signature for tf.data.Dataset
# For grayscale images: channels = 1, so initial patch feature dim = 6*6*1 = 36, then +2 for coordinates → 38.
output_signature = (
    (tf.TensorSpec(shape=(None, 64, patch_size*patch_size + 2), dtype=tf.float32),
     tf.TensorSpec(shape=(None, 64, 64), dtype=tf.float32)),
    tf.TensorSpec(shape=(None, 8), dtype=tf.float32)
)

train_dataset = tf.data.Dataset.from_generator(
    lambda: graph_generator(train_generator, patch_size, img_size),
    output_signature=output_signature
)

validation_dataset = tf.data.Dataset.from_generator(
    lambda: graph_generator(validation_generator, patch_size, img_size),
    output_signature=output_signature
)

test_dataset = tf.data.Dataset.from_generator(
    lambda: graph_generator(test_generator, patch_size, img_size),
    output_signature=output_signature
)

# Test sample prints
print("Train dataset sample:")
for (X, y) in train_dataset.take(1):
    node_features, adj_matrices = X
    print("  Node features shape:", node_features.shape)   # Expected: (BATCH_SIZE, 64, 38)
    print("  Adjacency shape:", adj_matrices.shape)           # Expected: (BATCH_SIZE, 64, 64)
    print("  Labels shape:", y.shape)                         # Expected: (BATCH_SIZE, 8)

print("\nValidation dataset sample:")
for (X, y) in validation_dataset.take(1):
    node_features, adj_matrices = X
    print("  Node features shape:", node_features.shape)
    print("  Adjacency shape:", adj_matrices.shape)
    print("  Labels shape:", y.shape)

print("\nTest dataset sample:")
for (X, y) in test_dataset.take(1):
    node_features, adj_matrices = X
    print("  Node features shape:", node_features.shape)
    print("  Adjacency shape:", adj_matrices.shape)
    print("  Labels shape:", y.shape)

# Optionally, retrieve one example image for visualization
example_images, _ = next(train_generator)
example_img = example_images[0].squeeze()


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Concatenate, Add
from tensorflow.keras.models import Model
from spektral.layers import GlobalAvgPool, GlobalMaxPool, GCNConv

# Custom subclass to ignore masks in GCNConv
class NoMaskGCNConv(GCNConv):
    def call(self, inputs, mask=None):
        # Ignore any provided mask
        return super().call(inputs, mask=None)
    
    def compute_mask(self, inputs, mask=None):
        # Do not propagate any mask
        return None

def build_gcn_model(n_node_features, n_classes, num_nodes):
    # Inputs for node features and adjacency matrix
    x_in = tf.keras.Input(shape=(num_nodes, n_node_features), name="node_features")
    a_in = tf.keras.Input(shape=(num_nodes, num_nodes), name="adjacency_matrix")
    
    # First GCN layer with 32 filters using our custom layer
    x1 = NoMaskGCNConv(32, activation='relu')([x_in, a_in])
    # Second GCN layer with 64 filters
    x2 = NoMaskGCNConv(64, activation='relu')([x1, a_in])
    
    # Global pooling: max and average
    x_max = GlobalMaxPool()(x2)
    x_mean = GlobalAvgPool(name='global_avg_pool')(x2)
    # Concatenate the pooled features
    x_concat = Concatenate()([x_max, x_mean])
    
    # Fully connected layer and final classification layer
    x_fc = Dense(256, activation='relu')(x_concat)
    output = Dense(n_classes, activation='softmax')(x_fc)
    
    model = Model(inputs=[x_in, a_in], outputs=output)
    return model

# Parameters derived from earlier cells:
# With patch_size=3 on a 48x48 image, we have a 16x16 grid => 256 nodes.
img_size = 48
patch_size = 6
num_nodes = (img_size // patch_size) ** 2  # 16^2 = 256
n_node_features = patch_size * patch_size    # 3x3 = 9 features per node (grayscale)
n_classes = 8  # Update to 8 classes

model = build_gcn_model(n_node_features=n_node_features, n_classes=n_classes, num_nodes=num_nodes)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

activation_logger = ActivationLogger(train_dataset, "global_avg_pool")

# ----------------- Training -----------------
# Callbacks for robust training
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

history = model.fit(
    train_dataset,
    steps_per_epoch=len(train_generator),
    validation_data=validation_dataset,
    validation_steps=len(validation_generator),
    epochs=20,
    callbacks=[early_stop, reduce_lr, activation_logger]
)
