In [4]:
import json

# Load the JSON file
with open('concatenated_edges.json', 'r') as file:
    data = json.load(file)

# Iterate over edges and add the inverse edges
new_edges = []
for edge in data['edges']:
    # Add the original edge
    new_edges.append(edge)
    
    # Create the inverse edge
    inverse_edge = {
        "source": edge["target"],
        "target": edge["source"],
        "relationship": f"inverse_of_{edge['relationship']}"
    }
    new_edges.append(inverse_edge)

# Update the edges in the data
data['edges'] = new_edges

# Save the updated JSON file
with open('edges_with_inverses.json', 'w') as file:
    json.dump(data, file, indent=4)

print("Inverse edges added and saved to 'edges_with_inverses.json'")

Inverse edges added and saved to 'edges_with_inverses.json'


In [5]:
print(len(data['edges']))

357726


In [21]:
import json
import glob

# Specify the pattern for the files to concatenate (e.g., all JSON files in a folder)
file_pattern = "edges/*.json"  # Replace with your directory path containing JSON files
output_file = "concatenated_edges.json"

# Initialize a dictionary to store all edges
concatenated_data = {"edges": []}

# Iterate over all files matching the pattern
for file_name in glob.glob(file_pattern):
    try:
        # Open each file and load the JSON content
        with open(file_name, 'r') as file:
            data = json.load(file)
            # Append the edges to the main concatenated_data dictionary
            if "edges" in data:
                concatenated_data["edges"].extend(data["edges"])
    except Exception as e:
        print(f"Error processing file {file_name}: {e}")

# Write the concatenated data to the output file
with open(output_file, 'w') as output:
    json.dump(concatenated_data, output, indent=4)

print(f"Concatenation completed. Output written to {output_file}")


Concatenation completed. Output written to concatenated_edges.json


In [2]:
import json
import glob
import pickle

# Specify the pattern for the files to concatenate (e.g., all JSON and pickle files in a folder)
file_pattern = "nodes/*.*"  # Replace with your directory path containing JSON and pickle files
output_file = "concatenated_nodes.pkl"

# Initialize a dictionary to store all nodes
concatenated_data = {"nodes": []}

# Iterate over all files matching the pattern
for file_name in glob.glob(file_pattern):
    try:
        if file_name.endswith('.json'):
            # Open each JSON file and load the content
            with open(file_name, 'r') as file:
                data = json.load(file)
                # Append the nodes to the main concatenated_data dictionary
                if "nodes" in data:
                    concatenated_data["nodes"].extend(data["nodes"])
        elif file_name.endswith('.pkl'):
            # Open each pickle file and load the content
            with open(file_name, 'rb') as file:
                data = pickle.load(file)
                # Append the nodes to the main concatenated_data dictionary
                if "nodes" in data:
                    concatenated_data["nodes"].extend(data["nodes"])
    except Exception as e:
        print(f"Error processing file {file_name}: {e}")

# Write the concatenated data to the output file
with open(output_file, 'wb') as output:
    pickle.dump(concatenated_data, output)

print(f"Concatenation completed. Output written to {output_file}")


Concatenation completed. Output written to concatenated_nodes.pkl


In [3]:
print(concatenated_data["nodes"][-1])

{'id': 'T1574.012', 'type': 'TECHNIQUE', 'features': {'description_embedding': [-0.3422375023365021, -0.11290660500526428, 0.29790374636650085, -0.08616546541452408, 0.3600657284259796, -0.20797811448574066, 0.19554269313812256, 0.1933588683605194, 0.04602975398302078, -0.014048360288143158, -0.18186335265636444, -0.32796433568000793, -0.41597482562065125, 0.25697869062423706, -0.07092352956533432, 0.2613074779510498, 0.2174340933561325, 0.06485998630523682, -0.19480665028095245, 0.1316951960325241, 0.2315562665462494, 0.013660009950399399, -0.24110430479049683, 0.4650406241416931, 0.3810196816921234, 0.08466695994138718, 0.0993027314543724, 0.005710443947464228, -0.5502739548683167, 0.003987912088632584, 0.4400099515914917, 0.13323229551315308, 0.05454646050930023, -0.36378034949302673, 0.044982656836509705, 0.048827867954969406, 0.01869632489979267, -0.22190521657466888, 0.10164187848567963, 0.3677087128162384, -0.36943891644477844, -0.5144397616386414, 0.11307573318481445, 0.2942290

In [None]:
import json
import pickle

# Specify the input files
nodes_file = "concatenated_nodes.pkl"  # Path to the nodes pickle file
edges_file = "edges_with_inverses.json"  # Path to the edges JSON file
output_file = "gnn_input.pkl"  # Output pickle file

# Load nodes data from the pickle file
try:
    with open(nodes_file, 'rb') as file:
        nodes_data = pickle.load(file)
except Exception as e:
    print(f"Error loading nodes file {nodes_file}: {e}")
    nodes_data = {"nodes": []}

# Load edges data from the JSON file
try:
    with open(edges_file, 'r') as file:
        edges_data = json.load(file)
except Exception as e:
    print(f"Error loading edges file {edges_file}: {e}")
    edges_data = {"edges": []}

# Combine nodes and edges into a single dictionary
combined_data = {
    "nodes": nodes_data.get("nodes", []),
    "edges": edges_data.get("edges", [])
}

# Write the combined data to the output pickle file
with open(output_file, 'wb') as output:
    pickle.dump(combined_data, output)

print(f"Combination completed. Output written to {output_file}")
print(f"Number of nodes: {len(combined_data['nodes'])}")
print(f"Number of edges: {len(combined_data['edges'])}")


Combination completed. Output written to gnn_input.pkl
Number of nodes: 254173
Number of edges: 357726


In [7]:
print(combined_data["nodes"][0])

{'id': 'CAPEC-1', 'type': 'CAPEC', 'features': {'description_embedding': [-0.4108079671859741, 0.20447945594787598, 0.042592622339725494, 0.03443080559372902, 0.46285662055015564, -0.23435857892036438, 0.2093401700258255, 0.22783984243869781, 0.05502431467175484, 0.09157264977693558, -0.25912344455718994, -0.21151165664196014, -0.40648066997528076, 0.3665499687194824, -0.2204386591911316, 0.30305221676826477, 0.19241440296173096, 0.1712101846933365, -0.18042705953121185, 0.15306463837623596, 0.13122877478599548, 0.12804502248764038, -0.2897849380970001, 0.3431718945503235, 0.4257776737213135, -0.11790458112955093, 0.04072613641619682, 0.09462146461009979, -0.4299430847167969, -0.13277709484100342, 0.44143640995025635, 0.1273777186870575, -0.0767902359366417, -0.33949580788612366, -0.14485298097133636, 0.1259547919034958, -0.001567040104418993, -0.3194776773452759, -0.12886208295822144, 0.11825007945299149, -0.605018675327301, -0.29551005363464355, 0.23721405863761902, 0.219008609652519