#### Dataset Creation

In [1]:
import random
import torch
import numpy as np

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [2]:
from data_loading.models_dataset import EcoreModelDataset

config_params = dict(
    timeout = 120,
    min_enr = 1.2,
    min_edges = 10
)
model_dataset = EcoreModelDataset('ecore_555', reload=False, **config_params)
# dataset = EcoreModelDataset('modelset', reload=False, remove_duplicates=True, **config_params)
# dataset = EcoreModelDataset('mar-ecore-github', reload=True, **config_params)

Loading ecore_555 from pickle
Loaded ecore_555 with 281 graphs
Loaded ecore_555 with 281 graphs
Graphs: 281


In [4]:
from data_loading.models_dataset import ArchiMateModelDataset

config_params = dict(
    timeout = 120,
    min_edges = 10
)

model_dataset = ArchiMateModelDataset('eamodelset', reload=True, **config_params)

Loading Eamodelset:   0%|          | 0/979 [00:00<?, ?it/s]

Saving eamodelset to pickle
Saved eamodelset to pickle
Loaded eamodelset with 936 graphs
Graphs: 936


In [62]:
import json
import os

ea_dir = 'datasets/eamodelset/processed-models'

ea_models_json = list()
for ea_models_folder in os.listdir(ea_dir):
    ea_models_folder_path = os.path.join(ea_dir, ea_models_folder)
    if os.path.isdir(ea_models_folder_path):
        model_file = os.path.join(ea_models_folder_path, 'model.json')
        if os.path.exists(model_file):
            model = json.load(open(model_file))
            ea_models_json.append(model)

print(f'Found {len(ea_models_json)} EA models')

Found 978 EA models


In [64]:
archi_model = ea_models_json[0]
import json

print(json.dumps(archi_model, indent=4))

{
    "identifier": "https://me.big.tuwien.ac.at/EAModelSet/id-48fb3807bfa249a9bae607b6a92cc390",
    "archimateId": "id-48fb3807bfa249a9bae607b6a92cc390",
    "name": "LAE",
    "description": "",
    "formats": [
        "XML",
        "CSV",
        "JSON",
        "ARCHIMATE"
    ],
    "source": "GitHub",
    "repository": "",
    "license": "",
    "sourceFile": "raw-data/github/archimate/lae.archimate",
    "sourceFormat": "ARCHIMATE",
    "timestamp": "2023-07-06T09:29:17",
    "tags": [],
    "duplicates": [],
    "language": "fr",
    "elements": [
        {
            "id": "id-c3add5029d384f06971fc1ebbc20820f",
            "name": "Assitant Comptable",
            "type": "BusinessActor",
            "layer": "business"
        },
        {
            "id": "id-a077349feda8451ca3ceb3f85ec5a24d",
            "name": "Commercial",
            "type": "BusinessActor",
            "layer": "business"
        },
        {
            "id": "id-054503b05e3640a095663713605a6a22"

In [6]:
import torch

torch.tensor([[1, 3, 4, 5, 6], [1, 3, 4, 5, 6]]).t().shape

torch.Size([5, 2])

In [66]:
import networkx as nx

def create_graph(model_json):
    G = nx.DiGraph()
    for node in model_json['elements']:
        G.add_node(node['id'], **node)
    for edge in model_json['relationships']:
        G.add_edge(edge['sourceId'], edge['targetId'], **edge)
    return G

G = create_graph(archi_model)
G.number_of_nodes(), G.number_of_edges()

(142, 295)