In [19]:
import json
from itertools import combinations
from collections import defaultdict

In [20]:
def find_groups_with_unique_software(json_path):
    """
    Analyze groups to find those that use globally unique software.

    Parameters
    ----------
    json_path : str
        Path to JSON file containing group-to-software mapping.

    Returns
    -------
    dict
        Dictionary with groups using unique software and the list of unique software.
    """
    with open(json_path, 'r') as f:
        data = json.load(f)

    # Step 1: Build software-to-groups map
    software_to_groups = defaultdict(set)
    for group_id, info in data.items():
        for sw in info.get("items", []):
            software_to_groups[sw].add(group_id)

    # Step 2: Identify globally unique software
    globally_unique_software = {sw for sw, groups in software_to_groups.items() if len(groups) == 1}

    # Step 3: Identify groups using any unique software
    groups_with_unique_software = defaultdict(list)
    for group_id, info in data.items():
        for sw in info.get("items", []):
            if sw in globally_unique_software:
                groups_with_unique_software[group_id].append(sw)

    # Step 4: Summary
    print(f"Total groups with globally unique software: {len(groups_with_unique_software)}")
    print(f"Total globally unique software: {len(globally_unique_software)}\n")

    for group_id, sw_list in groups_with_unique_software.items():
        print(f"Group: {group_id} ({data[group_id]['name']})")
        for sw in sw_list:
            print(f"  - {sw}")
        print()

    return {
        "groups_with_unique_software": groups_with_unique_software,
        "globally_unique_software": globally_unique_software
    }


In [21]:
# Example usage
json_path = "mitre_software_map.json"  # Replace with actual path
groups_with_uniques = find_groups_with_unique_software(json_path)

Total groups with globally unique software: 111
Total globally unique software: 423

Group: G0001 (Axiom)
  - S0009
  - S0672

Group: G0003 (Cleaver)
  - S0056
  - S0004

Group: G0004 (Ke3chang)
  - S0280
  - S0691
  - S0439
  - S0227

Group: G0005 (APT12)
  - S0015
  - S0003

Group: G0006 (APT1)
  - S0017
  - S0025
  - S0119
  - S0026
  - S0121
  - S0122
  - S0345
  - S0109
  - S0123

Group: G0007 (APT28)
  - S0045
  - S0023
  - S0137
  - S0351
  - S0243
  - S0134
  - S0502
  - S0193
  - S0410
  - S0135
  - S0044
  - S0162
  - S0397
  - S0138
  - S0136
  - S0161
  - S0117
  - S0251
  - S0314

Group: G0009 (Deep Panda)
  - S0080
  - S0074
  - S0142

Group: G0010 (Turla)
  - S0335
  - S0126
  - S0538
  - S0091
  - S0168
  - S0537
  - S0581
  - S1075
  - S0265
  - S0395
  - S0256
  - S0587
  - S0393
  - S0668
  - S0022
  - S0102

Group: G0011 (PittyTiger)
  - S0010

Group: G0013 (APT30)
  - S0031
  - S0036
  - S0034
  - S0028
  - S0035

Group: G0016 (APT29)
  - S0677
  - S0635
  - S0054


In [22]:
def analyze_unique_software_pairs(json_path):
    """
    Analyze groups to find globally unique software, then identify unique pairs of software
    (after removing global uniques), and find new groups with only unique software pairs.

    Parameters
    ----------
    json_path : str
        Path to JSON file containing group-to-software mapping.

    Returns
    -------
    dict
        Summary of globally unique software, unique software pairs, and new groups.
    """
    with open(json_path, 'r') as f:
        data = json.load(f)

    # Step 1: Build software-to-groups mapping
    software_to_groups = defaultdict(set)
    for group_id, info in data.items():
        for soft in info.get("items", []):
            software_to_groups[soft].add(group_id)

    # Step 2: Find globally unique software
    globally_unique_software = {soft for soft, groups in software_to_groups.items() if len(groups) == 1}

    # Step 3: Build software-pair-to-groups and group-to-pairs mappings
    pair_to_groups = defaultdict(set)
    group_to_pairs = dict()

    for group_id, info in data.items():
        software_ids = info.get("items", [])
        software_names = info.get("software name", [])  # optional
        soft_map = dict(zip(software_ids, software_names)) if software_names else {}

        filtered_software = [s for s in software_ids if s not in globally_unique_software]
        pairs = list(combinations(sorted(filtered_software), 2))

        group_to_pairs[group_id] = {
            "pairs": pairs,
            "soft_map": soft_map,
            "name": info.get("name", "")
        }

        for pair in pairs:
            pair_to_groups[pair].add(group_id)

    # Step 4: Identify unique software pairs (in only one group)
    unique_pairs = {pair for pair, groups in pair_to_groups.items() if len(groups) == 1}

    # Step 5: Find groups with unique pairs
    groups_with_unique_pairs = defaultdict(list)
    for group_id, pair_info in group_to_pairs.items():
        for pair in pair_info["pairs"]:
            if pair in unique_pairs:
                groups_with_unique_pairs[group_id].append(pair)

    # Step 6: Get groups with globally unique software
    groups_with_global_uniques = {
        group_id for group_id, info in data.items()
        if any(s in globally_unique_software for s in info.get("items", []))
    }

    # Step 7: Identify new groups with only unique software pairs
    new_unique_pair_groups = set(groups_with_unique_pairs) - groups_with_global_uniques

    # Step 8: Print summary
    print(f"\nTotal groups with globally unique software: {len(groups_with_global_uniques)}")
    print(f"Total groups with unique software pairs (excluding globals): {len(groups_with_unique_pairs)}")
    print(f"New groups with unique software pairs only: {len(new_unique_pair_groups)}\n")

    if new_unique_pair_groups:
        print("Groups with unique software pairs only (no global unique software):")
        for group_id in sorted(new_unique_pair_groups):
            group_info = group_to_pairs[group_id]
            group_name = group_info["name"]
            soft_map = group_info.get("soft_map", {})
            print(f"\n{group_id}: {group_name}")
            for s1, s2 in groups_with_unique_pairs[group_id]:
                name1 = soft_map.get(s1, s1)
                name2 = soft_map.get(s2, s2)
                print(f"  - ({s1}, {s2}) → \"{name1}\" + \"{name2}\"")

    return {
        "groups_with_global_uniques": groups_with_global_uniques,
        "groups_with_unique_pairs": groups_with_unique_pairs,
        "new_unique_pair_groups": new_unique_pair_groups,
        "globally_unique_software": globally_unique_software,
        "all_unique_pairs": unique_pairs
    }


In [23]:
groups_with_uniques_pair = analyze_unique_software_pairs(json_path)


Total groups with globally unique software: 111
Total groups with unique software pairs (excluding globals): 60
New groups with unique software pairs only: 11

Groups with unique software pairs only (no global unique software):

G0008: Carbanak
  - (S0029, S0030) → "S0029" + "S0030"
  - (S0030, S0108) → "S0030" + "S0108"

G0078: Gorgon Group
  - (S0262, S0336) → "S0262" + "S0336"
  - (S0332, S0336) → "S0332" + "S0336"

G0082: APT38
  - (S0002, S0334) → "S0002" + "S0334"
  - (S0002, S0376) → "S0002" + "S0376"
  - (S0002, S0593) → "S0002" + "S0593"
  - (S0039, S0334) → "S0039" + "S0334"
  - (S0039, S0376) → "S0039" + "S0376"
  - (S0039, S0593) → "S0039" + "S0593"
  - (S0334, S0376) → "S0334" + "S0376"
  - (S0334, S0593) → "S0334" + "S0593"
  - (S0334, S0607) → "S0334" + "S0607"
  - (S0376, S0607) → "S0376" + "S0607"
  - (S0593, S0607) → "S0593" + "S0607"

G0091: Silence
  - (S0191, S0195) → "S0191" + "S0195"
  - (S0191, S0363) → "S0191" + "S0363"
  - (S0195, S0363) → "S0195" + "S0363"



In [24]:
def analyze_unique_software_triplets(json_path):
    """
    Analyze groups to find unique triplets of software after removing
    globally unique software and software involved in unique pairs.

    Parameters
    ----------
    json_path : str
        Path to JSON file containing group-to-software mapping.

    Returns
    -------
    dict
        Summary of globally unique software, unique pairs, and unique triplets.
    """
    with open(json_path, 'r') as f:
        data = json.load(f)

    # Step 1: Build software-to-groups mapping
    software_to_groups = defaultdict(set)
    for group_id, info in data.items():
        for soft in info.get("items", []):
            software_to_groups[soft].add(group_id)

    # Step 2: Find globally unique software
    globally_unique_software = {soft for soft, groups in software_to_groups.items() if len(groups) == 1}

    # Step 3: Build pair-to-groups mapping for all non-global software
    pair_to_groups = defaultdict(set)
    group_to_pairs = dict()

    for group_id, info in data.items():
        filtered_software = [s for s in info.get("items", []) if s not in globally_unique_software]
        pairs = list(combinations(sorted(filtered_software), 2))
        group_to_pairs[group_id] = pairs
        for pair in pairs:
            pair_to_groups[pair].add(group_id)

    # Step 4: Identify unique pairs
    unique_pairs = {pair for pair, groups in pair_to_groups.items() if len(groups) == 1}

    # Step 5: Flatten all software that appears in unique pairs
    software_in_unique_pairs = {soft for pair in unique_pairs for soft in pair}

    # Step 6: Build triplet-to-groups mapping (remove globals + unique-pair software)
    triplet_to_groups = defaultdict(set)
    group_to_triplets = dict()

    for group_id, info in data.items():
        filtered_software = [
            s for s in info.get("items", [])
            if s not in globally_unique_software and s not in software_in_unique_pairs
        ]
        triplets = list(combinations(sorted(filtered_software), 3))
        group_to_triplets[group_id] = triplets
        for triplet in triplets:
            triplet_to_groups[triplet].add(group_id)

    # Step 7: Identify unique triplets (in only one group)
    unique_triplets = {triplet for triplet, groups in triplet_to_groups.items() if len(groups) == 1}

    # Step 8: Map groups to their unique triplets
    groups_with_unique_triplets = defaultdict(list)
    for group_id, triplets in group_to_triplets.items():
        for triplet in triplets:
            if triplet in unique_triplets:
                groups_with_unique_triplets[group_id].append(triplet)

    # Step 9: Summary
    print(f"\nTotal groups with globally unique software: {len({g for g in data if any(s in globally_unique_software for s in data[g]['items'])})}")
    print(f"Total groups with unique software pairs: {len({g for g in group_to_pairs if any(pair in unique_pairs for pair in group_to_pairs[g])})}")
    print(f"Total groups with unique triplets (excluding above): {len(groups_with_unique_triplets)}\n")

    if groups_with_unique_triplets:
        print("Groups with unique triplets (no globals, no unique-pair software):")
        for group_id in sorted(groups_with_unique_triplets):
            group_name = data[group_id].get("name", "")
            print(f"\n{group_id}: {group_name}")
            for t1, t2, t3 in groups_with_unique_triplets[group_id]:
                print(f"  - ({t1}, {t2}, {t3})")

    return {
        "globally_unique_software": globally_unique_software,
        "unique_pairs": unique_pairs,
        "software_in_unique_pairs": software_in_unique_pairs,
        "unique_triplets": unique_triplets,
        "groups_with_unique_triplets": groups_with_unique_triplets
    }


In [25]:
result = analyze_unique_software_triplets(json_path)



Total groups with globally unique software: 111
Total groups with unique software pairs: 60
Total groups with unique triplets (excluding above): 0

