In [1]:
import pandas as pd
import json
import os
from statistics import mode
from collections import Counter
import requests
import re
import copy
import pprint

In [2]:
def load_config():
    # Get the absolute path of the project root (one directory up)
    project_root = os.path.abspath(os.path.join(os.getcwd(), '../..'))

    # Normalize the project_root to ensure it's correctly formatted
    project_root = os.path.normpath(project_root)
    
    config_path = os.path.join(project_root, 'config.json')

    if not os.path.exists(config_path):
        raise FileNotFoundError(f"Config file not found at expected location: {config_path}")

    with open(config_path, 'r') as f:
        config = json.load(f)

    return config, project_root


In [3]:
config, project_root = load_config()

group_path_enterprise = os.path.normpath(os.path.join(project_root, config["data_directory"], config["file_paths_groups_v15"]["enterprise"]))
group_path_mobile = os.path.normpath(os.path.join(project_root, config["data_directory"], config["file_paths_groups_v15"]["mobile"]))
group_path_ics = os.path.normpath(os.path.join(project_root, config["data_directory"], config["file_paths_groups_v15"]["ics"]))

In [4]:
# Collect file paths into a list
group_file_paths = [group_path_enterprise, group_path_mobile, group_path_ics]

In [5]:
def apt_group_mapping_from_config(groups_file_paths, mapping_type='software'):
    """
    Reads multiple Excel files for APT groups and their software or technique mappings.
    Merges all mappings and provides usage statistics.

    Parameters
    ----------
    groups_file_paths : dict
        Dictionary of attack type -> Excel file path.
    mapping_type : str
        Either 'software' or 'techniques' to select the appropriate sheet.

    Returns
    -------
    dict
        Mapping of APT group IDs to names and software/techniques used.
    int
        Number of unique APT groups.
    int
        Number of unique software or techniques used.
    dict
        Statistical summary: mean, median, mode, max/min counts and groups.
    """
    # Select sheet based on mapping_type
    if mapping_type == 'software':
        sheet_name = 'associated software'
    elif mapping_type == 'techniques':
        sheet_name = 'techniques used'
    else:
        raise ValueError("Invalid mapping_type. Choose either 'software' or 'techniques'.")

    all_rows = []

    # Iterate over files and load data
    for file_path in groups_file_paths:
        xls = pd.ExcelFile(file_path)
        try:
            df = pd.read_excel(xls, sheet_name=sheet_name)
        except ValueError:
            raise ValueError(f"Sheet '{sheet_name}' not found in {file_path}")

        required_columns = {'source name', 'source ID', 'target ID'}
        if not required_columns.issubset(df.columns):
            raise ValueError(f"Missing required columns in {file_path}: {df.columns.tolist()}")

        # For techniques, also require the target name
        if mapping_type == 'techniques' and 'target name' not in df.columns:
            raise ValueError(f"Missing 'target name' column in {file_path} for techniques mapping")

        # Add the target name column if mapping_type is 'techniques'
        all_rows.append(df[['source name', 'source ID', 'target ID', 'target name', 'target ref']] 
                         if mapping_type == 'software' else df[['source name', 'source ID', 'target ID', 'target name']])

    # Combine all rows from different files
    merged_df = pd.concat(all_rows, ignore_index=True)

    # Build the mapping: group ID -> {name, targets, technique names}
    apt_map = {}
    grouped = merged_df.groupby('source ID')

    for group_id, group_df in grouped:
        group_name = group_df['source name'].iloc[0]
        targets = group_df['target ID'].dropna().unique().tolist()

        # For techniques mapping, we need to include the technique names as well
        if mapping_type == 'techniques':
            technique_names = group_df['target name'].fillna('').unique().tolist()
            apt_map[group_id] = {
                'name': group_name,
                'items': targets,
                'technique name': technique_names
            }
        elif mapping_type == 'software':
            # For software, we continue with the existing logic
            types = group_df['target ref'].apply(lambda x: 'Malware' if 'malware' in str(x).lower() else 'Tool')
            type_mapping = types.value_counts().to_dict()

            apt_map[group_id] = {
                'name': group_name,
                'items': targets,
                'type': type_mapping
            }

    # Count per group (number of unique targets)
    item_counts = merged_df.groupby('source ID')['target ID'].nunique()

    # Stats
    mean_count = item_counts.mean()
    median_count = item_counts.median()
    try:
        mode_count = mode(item_counts)
    except:
        mode_count = 'No unique mode'

    max_apt_group = item_counts.idxmax()
    max_count = item_counts[max_apt_group]

    min_apt_group = item_counts.idxmin()
    min_count = item_counts[min_apt_group]

    stats = {
        'mean_count': mean_count,
        'median_count': median_count,
        'mode_count': mode_count,
        'max_count': max_count,
        'max_apt_group': max_apt_group,
        'min_count': min_count,
        'min_apt_group': min_apt_group
    }

    return apt_map, merged_df['source ID'].nunique(), merged_df['target ID'].nunique(), stats


In [6]:
# Example call to the function for techniques mapping
techniques_map, unique_groups_technique, unique_techniques, stats_technique = apt_group_mapping_from_config(
    group_file_paths, mapping_type='techniques'
)

# Example call to the function for software mapping
software_map, unique_groups_software, unique_software, stats_software = apt_group_mapping_from_config(
    group_file_paths, mapping_type='software'
)


In [7]:
#techniques_map.get("G0138
unique_groups_software, unique_software, stats_software

(138,
 509,
 {'mean_count': np.float64(6.753623188405797),
  'median_count': np.float64(4.0),
  'mode_count': 2,
  'max_count': np.int64(48),
  'max_apt_group': 'G0016',
  'min_count': np.int64(1),
  'min_apt_group': 'G0002'})

In [8]:
def groups_with_unique_techniques(technique_map):
    """
    Counts how many APT groups have at least one unique technique 
    (i.e., not used by any other group).

    Parameters
    ----------
    technique_map : dict
        Mapping of group ID to their techniques.

    Returns
    -------
    dict
        A summary including:
        - number of groups with unique techniques
        - group-to-unique-techniques map (up to 10 sample groups)
        - top 10 most common techniques (optional)
    """
    from collections import defaultdict, Counter

    technique_to_groups = defaultdict(set)
    group_to_techs = {}

    # Build reverse map: technique -> groups
    for group_id, data in technique_map.items():
        techniques = data.get("items", [])
        group_to_techs[group_id] = set(techniques)
        for tech in techniques:
            technique_to_groups[tech].add(group_id)

    # Identify unique techniques per group
    unique_per_group = {}
    for group_id, techs in group_to_techs.items():
        unique_techs = {t for t in techs if len(technique_to_groups[t]) == 1}
        if unique_techs:
            unique_per_group[group_id] = list(unique_techs)

    # Top 10 most used techniques (optional)
    technique_usage = Counter()
    for tech, groups in technique_to_groups.items():
        technique_usage[tech] = len(groups)
    top_10_common_techniques = technique_usage.most_common(10)

    # Print summary
    print(f"\nNumber of groups with unique techniques: {len(unique_per_group)}")
    print("\nExample groups with unique techniques:")
    for group_id, uniq_techs in list(unique_per_group.items())[:5]:  # limit to 5
        print(f"  Group {group_id} ({technique_map[group_id]['name']}):")
        for tech in uniq_techs[:5]:  # show up to 5 techniques
            print(f"    - {tech}")
        if len(uniq_techs) > 5:
            print(f"    ... and {len(uniq_techs) - 5} more")

    return {
        "num_groups_with_unique_techniques": len(unique_per_group),
        "unique_techniques_per_group": unique_per_group,
        "top_10_common_techniques": top_10_common_techniques
    }

In [9]:
#technique_stats = groups_with_unique_techniques(techniques_map)
software_stats = groups_with_unique_techniques(software_map)


Number of groups with unique techniques: 111

Example groups with unique techniques:
  Group G0001 (Axiom):
    - S0672
    - S0009
  Group G0003 (Cleaver):
    - S0056
    - S0004
  Group G0004 (Ke3chang):
    - S0691
    - S0227
    - S0439
    - S0280
  Group G0005 (APT12):
    - S0003
    - S0015
  Group G0006 (APT1):
    - S0026
    - S0109
    - S0122
    - S0121
    - S0119
    ... and 4 more


In [10]:
software_stats["top_10_common_techniques"]

[('S0002', 46),
 ('S0029', 31),
 ('S0039', 30),
 ('S0154', 24),
 ('S0363', 15),
 ('S0012', 14),
 ('S0013', 13),
 ('S0097', 13),
 ('S0100', 13),
 ('S0349', 12)]

In [11]:
def load_json_metadata(metadata_path):
    """
    Loads the group metadata JSON file.

    Parameters
    ----------
    metadata_path : str
        Path to the JSON file containing group metadata.

    Returns
    -------
    dict
        Group metadata loaded from the file.
    """
    with open(metadata_path, 'r', encoding='utf-8') as f:
        metadata = json.load(f)
    return metadata

In [12]:
# Path to group_metadata.json
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))  # move up from notebooks/
file_mitre_ttp = os.path.join(project_root, "group_analysis_json_outputs", "MITRE_ttp_group_analysis.json")
file_malpedia_ttp = os.path.join(project_root, "group_analysis_json_outputs", "Malpedia_ttp_group_analysis.json")
file_malpedia_software = os.path.join(project_root, "group_analysis_json_outputs", "malpedia_actor_to_software_map.json")

# Load the file
mitre_ttp_metadata = load_json_metadata(file_mitre_ttp)
malpedia_ttp_metadata = load_json_metadata(file_malpedia_ttp)
malpedia_software_metadata = load_json_metadata(file_malpedia_software)



In [13]:
def compare_group_sets(techniques_map, ttp_data):
    """
    Compare group sets between techniques_map (MITRE) and ttp_data (Threat Reports).
    
    Prints:
    - Total groups in techniques_map
    - Total groups in ttp_data
    - Intersection (common groups)
    - Groups only in techniques_map
    - Groups only in ttp_data
    """
    techniques_groups = set(techniques_map.keys())
    ttp_groups = set(ttp_data.keys())

    intersection = techniques_groups & ttp_groups
    only_in_techniques = techniques_groups - ttp_groups
    only_in_ttp_data = ttp_groups - techniques_groups

    print(f"Total groups in techniques_map: {len(techniques_groups)}")
    print(f"Total groups in ttp_data (Threat Reports): {len(ttp_groups)}")
    print(f"Groups present in both (intersection): {len(intersection)}\n")

    print(f"Groups only in techniques_map (not observed in reports): {len(only_in_techniques)}")
    print(f"Groups only in ttp_data (not in MITRE mapping): {len(only_in_ttp_data)}\n")

    return {
        "techniques_groups": techniques_groups,
        "ttp_groups": ttp_groups,
        "intersection": intersection,
        "only_in_techniques": only_in_techniques,
        "only_in_ttp_data": only_in_ttp_data
    }


In [14]:
group_stats = compare_group_sets(techniques_map, mitre_ttp_metadata)

Total groups in techniques_map: 148
Total groups in ttp_data (Threat Reports): 63
Groups present in both (intersection): 62

Groups only in techniques_map (not observed in reports): 86
Groups only in ttp_data (not in MITRE mapping): 1



In [15]:
def enhance_techniques_map_with_observed_ttps(techniques_map, ttp_data):
    """
    Enhances the techniques_map by adding new TTPs from ttp_data.
    Renames 'items' key to 'techniques' for better clarity.
    
    Parameters
    ----------
    techniques_map : dict
        Original techniques mapping from MITRE ATT&CK Excel sheets.
    ttp_data : dict
        Observed TTPs from threat report hashes.

    Returns
    -------
    tuple
        - Updated techniques_map with observed TTPs added.
        - A diff dictionary showing newly added TTPs per group.
    """
    enhanced_map = {}

    diff_per_group = {}

    for group_id, group_info in techniques_map.items():
        # Rename 'items' to 'techniques'
        enhanced_map[group_id] = {
            "name": group_info.get("name", ""),
            "techniques": group_info.get("items", []),  # use existing items under new key
            "technique name": group_info.get("technique name", [])
        }

    for group_id, threat_data in ttp_data.items():
        observed_ttps = set()
        for hash_entry in threat_data.get('hashes', []):
            observed_ttps.update(t.upper().strip() for t in hash_entry.get('ttps', []))

        if group_id in enhanced_map:
            official_ttps = set(t.upper().strip() for t in enhanced_map[group_id].get('techniques', []))
            new_ttps = observed_ttps - official_ttps

            if new_ttps:
                enhanced_map[group_id]["techniques"].extend(new_ttps)
                diff_per_group[group_id] = list(new_ttps)

        else:
            # Group missing in MITRE, create new entry
            enhanced_map[group_id] = {
                "name": "",
                "techniques": list(observed_ttps),
                "technique name": []
            }
            diff_per_group[group_id] = list(observed_ttps)

    return enhanced_map, diff_per_group


In [16]:
def print_diff_summary(diff_per_group, techniques_map):
    """
    Prints a summary of newly added techniques per group.

    Parameters
    ----------
    diff_per_group : dict
        Mapping of group ID to list of newly added TTPs.
    techniques_map : dict
        Mapping to get the group names.
    """
    if not diff_per_group:
        print("No new TTPs were added.")
        return

    print(f"New TTPs added to {len(diff_per_group)} groups:\n")
    for group_id, new_ttps in diff_per_group.items():
        group_name = techniques_map.get(group_id, {}).get("name", "Unknown Group")
        print(f"Group {group_id} ({group_name}): {len(new_ttps)} new TTPs")
        for ttp in new_ttps[:5]:  # show up to 5 per group
            print(f"  - {ttp}")
        if len(new_ttps) > 5:
            print(f"  ... and {len(new_ttps) - 5} more")
        print()


In [17]:
enhanced_techniques_map, diff_per_group = enhance_techniques_map_with_observed_ttps(techniques_map, mitre_ttp_metadata)

#print_diff_summary(diff_per_group, enhanced_techniques_map)


In [18]:
# Function to normalize group names
def normalize_group_name(name):
    
    if not isinstance(name, str):
        return ''
    
    # Convert to lowercase for case-insensitive comparison
    name = name.lower().strip()

    # Remove 'team' from names like 'Sandworm Team'
    if name.endswith(' team'):
        name = name.replace(' team', '')

    # Replace 'threat group-' with 'tg-' (e.g., 'Threat Group-1314' -> 'TG-1314')
    name = re.sub(r'threat group[- ]', 'tg-', name)

    # Remove 'temp.' or similar prefixes (e.g., 'Temp.Pittytiger' -> 'Pittytiger')
    name = re.sub(r'^temp[\. ]+', '', name)

    # Normalize spaces and dots (e.g., 'pitty tiger' == 'pitty.tiger')
    name = re.sub(r'[\. ]+', ' ', name)

    # Remove common suffixes like 'framework' or 'group' (e.g., 'Inception Framework' -> 'Inception')
    name = re.sub(r' (framework|group)$', '', name)

    # Standardize 'Confucius' and 'Confucious' to 'confucius'
    name = re.sub(r'confucious', 'confucius', name)

    # Normalize specific known prefixes (apt, unc, g)
    name = re.sub(
        r'\b(apt|unc|g)[\s\.-]*([a-z]*)[\s\.-]*(\d{1,4})\b',
        lambda m: m.group(1) + m.group(2) + m.group(3),
        name
    )
    #re.sub(r'([a-z])[\s\.-]?(\d{2,4})', r'\1\2', name)


    return name

In [19]:
def enhanced_techniques_software_map_with_group_aliases(techniques_map, group_metadata, software_map):
    """
    Builds an enhanced techniques map with MITRE and Malpedia metadata, and software data.

    Handles the following cases:
    - Group has techniques and software → merge both.
    - Group has techniques but no software → add empty attack_software key.
    - Group only in software_map → create entry with techniques empty.

    Parameters
    ----------
    techniques_map : dict
        Technique mappings per group.
    group_metadata : dict
        Metadata like names and aliases for groups.
    software_map : dict
        Software used by each group ID.

    Returns
    -------
    dict
        Enhanced mapping containing MITRE & Malpedia metadata, techniques, and software.
    """
    enhanced_map = {}
    all_group_ids = set(techniques_map.keys()).union(set(software_map.keys()))
    missing_metadata_groups = []

    for group_id in all_group_ids:
        technique_data = techniques_map.get(group_id, {})
        software_data = software_map.get(group_id, {})
        metadata = group_metadata.get(group_id, {})

        if not metadata:
            missing_metadata_groups.append(group_id)

        enhanced_map[group_id] = {
            'mitre name': metadata.get('MITRE Group Name', ''),
            'mitre alias': metadata.get('MITRE Associated Names', []),
            'malpedia name': metadata.get('Malpedia Actor Name', ''),
            'malpedia alias': metadata.get('Malpedia Aliases', []),
            'attack techniques': technique_data.get('techniques', []),
            'technique name': technique_data.get('technique name', []),
            'attack software': software_data.get('items', [])  # empty list if not present
        }

    if missing_metadata_groups:
        print(f"Warning: Metadata not found for {len(missing_metadata_groups)} groups.")
        print(f"Examples: {', '.join(missing_metadata_groups[:10])}")

    return enhanced_map


In [20]:
# Path to group_metadata.json
metadata_file_group_name_map = os.path.join(project_root, "attack_malpedia_intersection", "attack_malpedia_group_mapping.json")

# Load metadata
group_name_metadata = load_json_metadata(metadata_file_group_name_map)

In [21]:
enhanced_techniques_software_alias_map = enhanced_techniques_software_map_with_group_aliases(enhanced_techniques_map, group_name_metadata, software_map)

Examples: G0140, G0114, G0108, G1027, G0124, G0028, G0089


In [22]:
len(enhanced_techniques_software_alias_map.keys())

152

In [23]:
def dump_to_json(data, file_path):
    """
    Dumps data to a JSON file in the specified output folder.

    Parameters
    ----------
    data : dict
        The data to be written to the JSON file.
    output_folder : str
        The folder where the JSON file will be saved.
    filename : str
        The name of the output JSON file (including .json extension).

    Returns
    -------
    bool
        True if the data was successfully written to the file, False otherwise.
    """

    try:
        with open(file_path, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=4)
        return True
    except Exception as e:
        print(f"Error writing to file {file_path}: {e}")
        return False


In [24]:
base_dir = os.getcwd()
json_output_data_dir = os.path.join(base_dir, "intermediate_json_outputs")
# Ensure the output folder exists
os.makedirs(json_output_data_dir, exist_ok=True)
filename = "attack_group_software_profile.json"
# Full path to the output file
file_path = os.path.join(json_output_data_dir, filename)

In [25]:
#dump_to_json(enhanced_techniques_software_alias_map, file_path)

In [26]:
def group_ttps_by_threat_group(malpedia_ttp_path: str) -> dict:
    """
    Groups CVEs under threat groups, considering only the URLs with source 'actor' or 'attribution'.
    
    Parameters
    ----------
    malpedia_cve_path : str
        Path to the JSON file containing Malpedia CVE data.
    
    Returns
    -------
    dict
        A dictionary with threat groups as keys and their associated CVEs as values.
    """
    with open(malpedia_ttp_path, 'r') as file:
        data = json.load(file)
    
    grouped_ttps = {}

    # Process the data to group CVEs
    for group in data:
        if group == "Unknown":
            continue  # Skip the "Unknown" group
        # Collect only URLs with 'actor' or 'attribution' source
        relevant_urls = [url['url'] for url in data[group].get('urls', []) 
                         if url['source'] in ['actor', 'attribution']]
        
        if relevant_urls:
            # Collect CVEs from the current group
            ttps = set()
            for hash_entry in data[group].get('hashes', []):
                ttps.update(hash_entry.get('ttps', []))

            # Add the CVEs to the group if there are relevant URLs
            grouped_ttps[group] = sorted(ttps)

    return grouped_ttps

In [27]:
grouped_ttp_malpedia = group_ttps_by_threat_group(file_malpedia_ttp)

In [28]:
def update_with_malpedia_ttps_software(enhanced_techniques_software_alias_map, malpedia_ttps, malpedia_softwares):
    """
    Updates enhanced_techniques_software_alias_map with Malpedia techniques and software per group.

    Parameters
    ----------
    enhanced_techniques_software_alias_map : dict
        Dictionary mapping group IDs to their MITRE and Malpedia metadata.
    malpedia_ttps : dict
        Dictionary containing TTPs (techniques) per actor name from Malpedia.
    malpedia_softwares : dict
        Dictionary containing software per actor name from Malpedia.

    Returns
    -------
    dict
        Updated enhanced_techniques_software_alias_map including Malpedia techniques and software.
    """

    unmatched_malpedia_names = {
        "actor": [],
        "family": [],
        "attribution": []
    }
    unmatched_software_groups = []

    count_technique = 0
    count_software = 0
    count_new_malpedia_software = 0
    count_new_malpedia_technique = 0

    next_mal_id = 1
    def get_next_id():
        nonlocal next_mal_id
        while True:
            gid = f"MAL{next_mal_id:03d}"
            next_mal_id += 1
            if gid not in enhanced_techniques_software_alias_map:
                return gid

    # Build normalized group name index for fast lookups
    group_name_index = {}
    for group_id, group_info in enhanced_techniques_software_alias_map.items():
        names = [
            normalize_group_name(group_info.get('mitre name', '')),
            normalize_group_name(group_info.get('malpedia name', ''))
        ] + [
            normalize_group_name(alias) for alias in group_info.get('mitre alias', []) + group_info.get('malpedia alias', [])
        ]
        for name in names:
            group_name_index[name] = group_id

        # Ensure empty lists exist for keys
        enhanced_techniques_software_alias_map[group_id].setdefault('malpedia techniques', [])
        enhanced_techniques_software_alias_map[group_id].setdefault('malpedia software', [])

    # Process techniques
    for malpedia_group, malpedia_data in malpedia_ttps.items():
        norm_name = normalize_group_name(malpedia_group)
        group_id = group_name_index.get(norm_name)
        sorted_ttps = sorted(malpedia_data)

        if group_id:
            enhanced_techniques_software_alias_map[group_id]['malpedia techniques'] = sorted_ttps
            count_technique += 1
        else:
            
            new_entry = {
                "mitre name": "",
                "mitre alias": [],
                "malpedia name": malpedia_group,
                "malpedia alias": [],
                "attack techniques": [],
                "technique name": [],
                "attack software": [],
                "malpedia techniques": sorted_ttps,
                "malpedia software": []
            }
            new_group_id = get_next_id()
            enhanced_techniques_software_alias_map[new_group_id] = new_entry
            group_name_index[norm_name] = new_group_id
            count_new_malpedia_technique += 1


    # Process software
    for malpedia_group, software_list in malpedia_softwares.items():
        norm_name = normalize_group_name(malpedia_group)
        group_id = group_name_index.get(norm_name)
    
        if group_id:
            enhanced_techniques_software_alias_map[group_id]['malpedia software'] = software_list
            count_software += 1
        else:
            # Add as new entry similar to techniques block
            new_entry = {
                "mitre name": "",
                "mitre alias": [],
                "malpedia name": malpedia_group,
                "malpedia alias": [],
                "attack techniques": [],
                "technique name": [],
                "attack software": [],
                "malpedia techniques": [],
                "malpedia software": software_list
            }
            new_group_id = get_next_id()
            enhanced_techniques_software_alias_map[new_group_id] = new_entry
            group_name_index[norm_name] = new_group_id
            count_new_malpedia_software += 1  # Increment since we're adding a new one now


    # Report unmatched software groups
    #if unmatched_software_groups:
    #    print(f"Warning: {len(unmatched_software_groups)} Malpedia software groups not matched to any MITRE group.")
    #    print(f"Examples: {', '.join(unmatched_software_groups[:20])} ...")
    print(f"Total keys in the main map: {len(enhanced_techniques_software_alias_map)}")
    print(f"Total keys in Malpedia techniques: {len(malpedia_ttps)}")
    print(f"Total matches with Malpedia techniques: {count_technique}")
    print(f"Total keys in Malpedia software/families: {len(malpedia_softwares)}")
    print(f"Total matches with Malpedia software: {count_software}")
    print(f"Total number of newly added Malpedia groups from techniques: {count_new_malpedia_technique}")
    print(f"Total number of newly added Malpedia groups from software: {count_new_malpedia_software}")

    return enhanced_techniques_software_alias_map


In [29]:
complete_technique_map = update_with_malpedia_ttps_software(enhanced_techniques_software_alias_map, grouped_ttp_malpedia, malpedia_software_metadata)

Total keys in the main map: 331
Total keys in Malpedia techniques: 215
Total matches with Malpedia techniques: 122
Total keys in Malpedia software/families: 247
Total matches with Malpedia software: 161
Total number of newly added Malpedia groups from techniques: 93
Total number of newly added Malpedia groups from software: 86


In [30]:
len(complete_technique_map)

331

In [43]:
filename = "malpedia_attack_group_software_profile.json"
# Full path to the output file
file_path = os.path.join(json_output_data_dir, filename)
#dump_to_json(enhanced_techniques_software_alias_map, file_path)

In [31]:
filename = "malpedia_attack_group_software_union_profile.json"
# Full path to the output file
file_path = os.path.join(json_output_data_dir, filename)
dump_to_json(complete_technique_map, file_path)

True

In [18]:
# URL of the MITRE ATT&CK Groups page
#url = "https://attack.mitre.org/groups/"
url = "https://attack.mitre.org/versions/v15/groups/"

# Send a GET request to fetch the page content
response = requests.get(url)
if response.status_code == 200:
    # Use regular expression to find all group IDs (e.g., G0001, G1000)
    group_ids = re.findall(r'G\d{4}', response.text)
    unique_group_ids = sorted(set(group_ids))
    
    print(f"Total groups found: {len(unique_group_ids)}")
    #print("Group IDs:")
    #for gid in unique_group_ids:
    #    print(gid)
else:
    print(f"Failed to retrieve data. Status code: {response.status_code}")


Total groups found: 152


In [None]:
# Assuming 'parsed_group_ids' is a set of group IDs from your Excel data
parsed_group_ids = set(techniques_map.keys())

# 'unique_group_ids' is the set obtained from the MITRE website
missing_in_parsed = set(unique_group_ids) - parsed_group_ids
extra_in_parsed = parsed_group_ids - set(unique_group_ids)

print("Group IDs missing in parsed data:", missing_in_parsed)
print("Unexpected group IDs in parsed data:", extra_in_parsed)