In [7]:
import os
import re

def count_files_with_techniques_and_subtechniques(folder_path):
    # Counters for files with only techniques and with both techniques and sub-techniques
    only_techniques_count = 0
    techniques_and_subtechniques_count = 0

    # Regex to extract TTP IDs (ignores extra metadata)
    ttp_pattern = re.compile(r'\bT\d{4}(\.\d+)?\b')

    # Iterate over each file in the folder
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        
        # Skip if it's not a file
        if not os.path.isfile(file_path):
            continue

        # Flags to check if the file has techniques and/or sub-techniques
        has_technique = False
        has_subtechnique = False

        # Open the file and check each line for TTP patterns
        with open(file_path, 'r') as file:
            for line in file:
                match = ttp_pattern.search(line)
                if match:
                    ttp_id = match.group(0)  # Extracts the TTP ID (e.g., "T1027" or "T1003.004")
                    if "." in ttp_id:
                        has_subtechnique = True  # Detected a sub-technique
                    else:
                        has_technique = True     # Detected a technique

        # Update the counters based on what was found in the file
        if has_technique and not has_subtechnique:
            only_techniques_count += 1
        elif has_technique and has_subtechnique:
            techniques_and_subtechniques_count += 1

    return only_techniques_count, techniques_and_subtechniques_count


In [8]:
folder_path = 'C:/Users/Aakanksha Saha/Documents/CTI_downloads/malpedia_20220718/malpedia_20220718/iocs'
#folder_path = 'C:/Users/Aakanksha Saha/Documents/CTI_downloads/downloads/20241008_downloads/iocs2'

only_techniques, both_techniques_and_subtechniques = count_files_with_techniques_and_subtechniques(folder_path)
print(f"Files with only techniques: {only_techniques}")
print(f"Files with both techniques and sub-techniques: {both_techniques_and_subtechniques}")

Files with only techniques: 472
Files with both techniques and sub-techniques: 523


In [10]:
import os
import re

def count_cumulative_unique_ttps(folder_path):
    # Set to store cumulative unique TTP IDs across all files
    cumulative_unique_ttps = set()

    # Regex to capture full TTP IDs (anything starting with 'T' followed by numbers, possibly with dot and more numbers)
    ttp_pattern = re.compile(r'\bT\d{4}(?:\.\d+)?\b')

    # Iterate over each file in the folder
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        
        # Skip if it's not a file
        if not os.path.isfile(file_path):
            continue

        # Open the file and check each line for TTP patterns
        with open(file_path, 'r') as file:
            for line in file:
                # Find all TTP IDs in the line and add them directly to the cumulative set
                matches = ttp_pattern.findall(line)
                cumulative_unique_ttps.update(matches)  # Add each unique TTP ID found in the line

    # Count of unique TTPs across all files
    num_cumulative_unique_ttps = len(cumulative_unique_ttps)

    return num_cumulative_unique_ttps

# Usage example:
# folder_path = 'your/folder/path'
# num_ttps = count_cumulative_unique_ttps(folder_path)
# print(f"Total unique TTP IDs across all files: {num_ttps}")


In [11]:
folder_path = 'C:/Users/Aakanksha Saha/Documents/CTI_downloads/malpedia_20220718/malpedia_20220718/iocs'
#folder_path = 'C:/Users/Aakanksha Saha/Documents/CTI_downloads/downloads/20241008_downloads/iocs2'
num_techniques = count_cumulative_unique_ttps(folder_path)
print(f"Unique techniques: {num_techniques}")
#print(f"Unique sub-techniques: {num_subtechniques}")

Unique techniques: 752
