In [1]:
import json
from pathlib import Path
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from tqdm import tqdm 

def flatten(list_of_list):
    return [item for sublist in list_of_list for item in sublist]

path_to_training = Path("data/training")
path_to_test = Path("data/test")

In [2]:
#####
# training and test sets of transcription ids
#####
training_set = ['ES2002', 'ES2005', 'ES2006', 'ES2007', 'ES2008', 'ES2009', 'ES2010', 'ES2012', 'ES2013', 'ES2015', 'ES2016', 'IS1000', 'IS1001', 'IS1002', 'IS1003', 'IS1004', 'IS1005', 'IS1006', 'IS1007', 'TS3005', 'TS3008', 'TS3009', 'TS3010', 'TS3011', 'TS3012']
training_set = flatten([[m_id+s_id for s_id in 'abcd'] for m_id in training_set])
training_set.remove('IS1002a')
training_set.remove('IS1005d')
training_set.remove('TS3012c')

test_set = ['ES2003', 'ES2004', 'ES2011', 'ES2014', 'IS1008', 'IS1009', 'TS3003', 'TS3004', 'TS3006', 'TS3007']
test_set = flatten([[m_id+s_id for s_id in 'abcd'] for m_id in test_set])

In [3]:
graph_links_labels= set()
for id in training_set:
    with open(path_to_training / f"{id}.txt", "r") as graphe:
        for line in graphe:
            l = line.split()
            graph_links_labels.add(l[1])
L = list(graph_links_labels)
int2label = {indice: valeur for indice, valeur in enumerate(L)}
label2int = {valeur: indice for indice, valeur in enumerate(L)}
label2int
N_vocab_links = len(L)

In [4]:
def feature_extract(transcription_id, is_test=False):
    path_to_training = Path("data/training")
    path_to_test = Path("data/test")
    path_jsaon = path_to_test if is_test else path_to_training
    with open(path_jsaon / f"{transcription_id}.json", "r") as file:
        json_transcription = json.load(file)
    bert_array = np.load('feature-extraction/bert/' + ('test/' if is_test else 'training/') + transcription_id +'.npy')
    G = nx.DiGraph()
    with open(path_jsaon / f"{transcription_id}.txt", "r") as graphe:
        for line in graphe:
            l = line.split()
            G.add_edge(int(l[0]), int(l[2]), edge_type=label2int[l[1]])
    for i in range(len(G.nodes)):
        G.nodes[i]['feature'] = bert_array[i]

    # Define node and edge features
    node_features = np.array([G.nodes[i]['features'] for i in G.nodes])
    edge_features = np.array([G.edges[i, j]['edge_type'] for i, j in G.edges])

    # Create StellarGraph object
    return G

def get_labels(transcription_id):
    with open("data/training_labels.json", "r") as file:
        training_labels = json.load(file)
    return training_labels[transcription_id]

In [62]:
import tensorflow as tf
from spektral.layers import GCNConv

def create_model(nb_canaux, dim_embedding, dim_post_conv, nb_nodes=None):

    node_features = tf.keras.Input(shape=(nb_nodes, dim_embedding), name="node_features")
    edge_features = tf.keras.Input(shape=(nb_canaux, nb_nodes, nb_nodes), name="edge_features")

    adjacency_matrices = [...]  # Liste de vos 16 matrices d'adjacence

    # Utilisation de 16 couches GraphConv avec différentes matrices d'adjacence
    graph_conv_outputs = []
    for i in range(nb_canaux):
        #flattened_size = tf.reduce_prod(tf.shape(edge_features[:,i,:,:])[1:], name='proj-'+str(i))
        #adjacency_matrix = tf.reshape(edge_features[:,i,:,:], (-1, flattened_size), name='matrix-'+str(i))
        adjacency_matrix = edge_features[:,i,:,:]
        GCN = GCNConv(dim_post_conv, activation='relu', name="GCN_"+str(i))
        graph_conv_output = GCN([node_features, adjacency_matrix])
        graph_conv_outputs.append(graph_conv_output)

    concatenated = tf.keras.layers.Concatenate(axis=2, name='concat')(graph_conv_outputs)
    end = tf.keras.layers.Dense(200)(concatenated)
    output = tf.keras.layers.Dense(1)(end)

    # Créer un modèle
    model = tf.keras.Model(inputs=[node_features, edge_features], outputs=output)
    return model

create_model(16, 200, 100, None)

<keras.src.engine.functional.Functional at 0x19501291f40>