In [None]:
cve_map_file = os.path.join(project_root, "group_analysis_json_outputs", "MITRE_cve_group_analysis.json")
cve_map_metdata = load_json_metadata(cve_map_file)


In [None]:
def load_cves_from_json(json_file):
    """
    Loads the CVE data from a JSON file and returns a dictionary of group IDs to associated CVEs.
    
    Parameters
    ----------
    json_file : str
        Path to the JSON file containing APT group CVE data.
    
    Returns
    -------
    dict
        A dictionary with group IDs as keys and a list of CVEs as values.
    """
    with open(json_file, 'r') as file:
        data = json.load(file)

    cve_map = {}
    for group_id, details in data.items():
        # Flatten the CVEs associated with all hashes for each group
        cves = set()
        for item in details.get('hashes', []):
            cves.update(item.get('cves', []))
        cve_map[group_id] = cves
    
    return cve_map

In [None]:
def compare_apt_groups_detailed(techniques_map, software_map, cve_map, group_ids):
    """
    Compare APT groups based on their techniques, software, and CVEs.
    Provides detailed differences including common, unique, and union, as well as the Jaccard Index.
    
    Parameters:
    ----------
    techniques_map : dict
        A mapping of APT group IDs to techniques used.
    software_map : dict
        A mapping of APT group IDs to software used.
    cve_map : dict
        A mapping of APT group IDs to CVEs.
    group_ids : list
        A list of APT group IDs to compare.

    Returns:
    -------
    dict
        A dictionary containing:
        - common_techniques: Common techniques used by all the selected groups.
        - common_software: Common software used by all the selected groups.
        - common_cves: Common CVEs between all the selected groups.
        - unique_techniques: Unique techniques for each group.
        - unique_software: Unique software for each group.
        - unique_cves: Unique CVEs for each group.
        - union_techniques: Union of all techniques used by the selected groups.
        - union_software: Union of all software used by the selected groups.
        - union_cves: Union of all CVEs used by the selected groups.
        - jaccard_index_techniques: Jaccard Index for techniques.
        - jaccard_index_software: Jaccard Index for software.
        - jaccard_index_cves: Jaccard Index for CVEs.
    """
    common_techniques = None
    common_software = None
    common_cves = None
    unique_techniques = {}
    unique_software = {}
    unique_cves = {}

    # To keep track of all techniques, software, and CVEs for union and jaccard index
    all_techniques_sets = []
    all_software_sets = []
    all_cves_sets = []

    # Loop over the selected group IDs
    for group_id in group_ids:
        if group_id not in techniques_map or group_id not in software_map:
            raise ValueError(f"Group ID {group_id} not found in techniques_map or software_map")

        # Get the techniques and software for the current group
        group_techniques = set(techniques_map.get(group_id, {}).get('items', []))
        group_software = set(software_map.get(group_id, {}).get('items', []))

        # Handle cases where a group might not have CVEs
        group_cves = cve_map.get(group_id, set())  # Default to an empty set if not found

        # Update common_techniques, common_software, and common_cves by intersecting with the first group
        if common_techniques is None:
            common_techniques = group_techniques
        else:
            common_techniques &= group_techniques

        if common_software is None:
            common_software = group_software
        else:
            common_software &= group_software

        if common_cves is None:
            common_cves = group_cves
        else:
            common_cves &= group_cves

        # Collect unique techniques, software, and CVEs for the current group
        unique_techniques[group_id] = group_techniques - common_techniques
        unique_software[group_id] = group_software - common_software
        unique_cves[group_id] = group_cves - common_cves

        # Collect sets of techniques, software, and CVEs for Jaccard index and union
        all_techniques_sets.append(group_techniques)
        all_software_sets.append(group_software)
        all_cves_sets.append(group_cves)

    # Union of all techniques, software, and CVEs
    union_techniques = set.union(*all_techniques_sets)
    union_software = set.union(*all_software_sets)
    union_cves = set.union(*all_cves_sets)

    # Jaccard Index for techniques, software, and CVEs
    jaccard_index_techniques = len(common_techniques) / len(union_techniques) if len(union_techniques) > 0 else 0
    jaccard_index_software = len(common_software) / len(union_software) if len(union_software) > 0 else 0
    jaccard_index_cves = len(common_cves) / len(union_cves) if len(union_cves) > 0 else 0

    # Prepare the result
    result = {
        'common_techniques': list(common_techniques),
        'common_software': list(common_software),
        'common_cves': list(common_cves),
        'unique_techniques': unique_techniques,
        'unique_software': unique_software,
        'unique_cves': unique_cves,
        'union_techniques': list(union_techniques),
        'union_software': list(union_software),
        'union_cves': list(union_cves),
        'jaccard_index_techniques': jaccard_index_techniques,
        'jaccard_index_software': jaccard_index_software,
        'jaccard_index_cves': jaccard_index_cves
    }

    return result


In [None]:
malpedia_mitre_map = {
    'apt17': ['G0025', 'G0001'],
    'apt19': ['G0073', 'G0009'],
    'apt30': ['G0013', 'G0030'],
    'lazarus': ['G0082', 'G0138', 'G0032'],
    'apt41': ['G0096', 'G0044'],
    'earth lusca': ['G0143', 'G1006'],
    'fin7': ['G0008', 'G0046'],
    'dragonok': ['G0017', 'G0002'],
    'mustang panda': ['G1014', 'G0129']
}

In [None]:
## add the techniques_map and software_map from combining group profile and reading mitre excel datasheet

for actor, group_ids in malpedia_mitre_map.items():
    try:
        result = compare_apt_groups_detailed(techniques_map, software_map, cve_map, group_ids)

        print(f"Comparison for {actor} ({', '.join(group_ids)})")
        print(f"  - Common Techniques: {len(result['common_techniques'])}")
        print(f"  - Common Software: {len(result['common_software'])}")
        print(f"  - Common CVEs: {len(result['common_cves'])}")
        print(f"  - Jaccard (Techniques): {result['jaccard_index_techniques']:.2f}")
        print(f"  - Jaccard (Software): {result['jaccard_index_software']:.2f}")
        print(f"  - Jaccard (CVEs): {result['jaccard_index_cves']:.2f}")

    except ValueError as e:
        print(f"Skipping {actor} due to error: {e}")