In [14]:
import os
import json
import csv

# Step 1: Load configuration from a JSON file
with open("config.json", "r") as config_file:
    config = json.load(config_file)

# Step 2: Extract the data directory and file paths for software and groups from the configuration
data_directory = config["data_directory"]
groups_file_paths = {key: os.path.join(data_directory, value) for key, value in config["file_paths_groups_v15"].items()}

In [15]:
import pandas as pd
import json

# Function to load groups from an Excel file and create a dictionary
def load_groups_from_excel(file_path, sheet_name):
    # Read the Excel file
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    # Extract the relevant columns: 'ID', 'Name', 'Associated Groups'
    df_subset = df[['ID', 'name', 'associated groups']]
    # Create a dictionary for groups
    group_dict = {}
    for _, row in df_subset.iterrows():
        group_id = row['ID']
        group_name = row['name']
        associated_groups = row['associated groups']
        # Add the group information to the dictionary
        group_dict[group_id] = {
            'name': group_name,
            'associated_groups': associated_groups
        }
    return group_dict

In [16]:
# Step 4: Define a function to load the groups mappings for the given attack type
def load_attack_groups(attack_type, sheet_name="groups"):
    file_path = groups_file_paths.get(attack_type)
    if file_path:
        return load_groups_from_excel(file_path, sheet_name=sheet_name)
    else:
        raise ValueError(f"Invalid attack type: {attack_type}")

# Step 6: Load the groups mappings
enterprise_groups = load_attack_groups("enterprise")
ics_groups = load_attack_groups("ics")
mobile_groups = load_attack_groups("mobile")

In [17]:
# Step 4: Identify and add unique ICS and Mobile group IDs not in Enterprise
for group_id, group_info in ics_groups.items():
    if group_id not in enterprise_groups:
        # Add the unique ICS group to the enterprise groups mapping
        enterprise_groups[group_id] = group_info

for group_id, group_info in mobile_groups.items():
    if group_id not in enterprise_groups:
        # Add the unique Mobile group to the enterprise groups mapping
        enterprise_groups[group_id] = group_info

# Step 6: Display the counts and first few entries
print(f"\nTotal groups in updated mapping: {len(enterprise_groups)}")
print("\nSome example group mappings:")
for group_id, group_info in list(enterprise_groups.items())[:5]:  # Display first 5 for example
    print(f"ID: {group_id}, Name: {group_info['name']}, Associated Groups: {group_info['associated_groups']}")



Total groups in updated mapping: 152

Some example group mappings:
ID: G1028, Name: APT-C-23, Associated Groups: Arid Viper, Big Bang APT, Desert Falcon, Grey Karkadann, Mantis, TAG-63, Two-tailed Scorpion
ID: G0099, Name: APT-C-36, Associated Groups: Blind Eagle
ID: G0006, Name: APT1, Associated Groups: Comment Crew, Comment Group, Comment Panda
ID: G0005, Name: APT12, Associated Groups: DNSCALC, DynCalc, IXESHE, Numbered Panda
ID: G0023, Name: APT16, Associated Groups: nan


In [18]:
# # Step 5: Optionally, export the updated group mapping to a new JSON file
# with open('group_mapping_MITRE.json', 'w') as f:
#     json.dump(enterprise_groups, f, indent=4)

In [19]:
# Function to read the actors data from the JSON file
def read_actors_from_file(filename="actors_data.json"):
    try:
        with open(filename, "r") as json_file:
            actors_data = json.load(json_file)  # Load the JSON data from the file
            return actors_data
    except FileNotFoundError:
        print(f"File {filename} not found.")
        return None
    except json.JSONDecodeError:
        print("Error decoding the JSON data.")
        return None



In [20]:
# Load the MITRE group mappings (this should be the mappings of group_name ->aliases created from the Excel files)
def read_group_mappings_mitre():
    group_mappings_file_path = 'group_mapping_MITRE.json'  # Replace with actual path to group mappings
    try:
        with open(group_mappings_file_path, 'r') as f:
            mitre_group_mappings = json.load(f)
            return mitre_group_mappings
    except FileNotFoundError:
        print(f"File {filename} not found.")
        return None
    except json.JSONDecodeError:
        print("Error decoding the JSON data.")
        return None      


In [21]:
import re

# Function to normalize group names
def normalize_group_name(name):
    # Convert to lowercase for case-insensitive comparison
    name = name.lower().strip()

    # Remove 'team' from names like 'Sandworm Team'
    if name.endswith(' team'):
        name = name.replace(' team', '')

    # Replace 'threat group-' with 'tg-' (e.g., 'Threat Group-1314' -> 'TG-1314')
    name = re.sub(r'threat group[- ]', 'tg-', name)

    # Remove 'temp.' or similar prefixes (e.g., 'Temp.Pittytiger' -> 'Pittytiger')
    name = re.sub(r'^temp[\. ]+', '', name)

    # Normalize spaces and dots (e.g., 'pitty tiger' == 'pitty.tiger')
    name = re.sub(r'[\. ]+', ' ', name)

    # Remove common suffixes like 'framework' or 'group' (e.g., 'Inception Framework' -> 'Inception')
    name = re.sub(r' (framework|group)$', '', name)

    # Standardize 'Confucius' and 'Confucious' to 'confucius'
    name = re.sub(r'conficious', 'confucius', name)

    return name


In [22]:
# Compare MITRE group names and synonyms with Malpedia actors
def find_intersection_with_malpedia_actors(malpedia_actors_data, mitre_group_mappings, output_file="group_intersection_output.json"):
    intersection = []  # Store MITRE groups that intersect with Malpedia actors
    unique_group_ids = set()  # To store unique group IDs for intersection
    mitre_groups_not_in_malpedia = []  # Store MITRE groups not found in Malpedia

        # Dictionary to store the results for the JSON output
    intersection_data = {}

    # Loop through each MITRE group in mitre_group_mappings
    for group_id, group_info in mitre_group_mappings.items():
        group_name = normalize_group_name(group_info['name'])  # Normalize group name
        associated_groups = group_info.get('associated_groups', '')

        # Normalize associated_groups and ensure it's a list for iteration
        if isinstance(associated_groups, float) or associated_groups is None:
            associated_groups = ''
        if isinstance(associated_groups, str):
            associated_groups = [ag.strip() for ag in associated_groups.split(',')]

        #associated_groups = [normalize_group_name(ag) for ag in associated_groups] if isinstance(associated_groups, list) else [normalize_group_name(associated_groups)]
        associated_groups = [normalize_group_name(ag) for ag in associated_groups]

        found_match = False

        # Loop through each actor in Malpedia actors data
        for actor_id, actor_info in malpedia_actors_data.items():
            actor_name = normalize_group_name(actor_info['value'])  # Normalize actor name
            synonyms = [normalize_group_name(synonym) for synonym in actor_info.get('meta', {}).get('synonyms', [])]  # Normalize synonyms

            # Check if group_name or associated_groups match with Malpedia actor name or synonyms
            if group_name == actor_name or group_name in synonyms:
                intersection.append((group_name, actor_name, group_id))  # Found intersection with group_name
                unique_group_ids.add(group_id)
                found_match = True

                # Add data to intersection_data for JSON output
                intersection_data[group_id] = {
                    'MITRE Group Name': group_name,
                    'MITRE Associated Names': associated_groups,
                    'Malpedia Actor Name': actor_name,
                    'Malpedia Aliases': synonyms
                }
                
                break  # No need to check further if match is found
            elif any(ag == actor_name or ag in synonyms for ag in associated_groups):
                intersection.append((group_name, actor_name, group_id))  # Found intersection with associated group
                unique_group_ids.add(group_id)
                found_match = True

                # Add data to intersection_data for JSON output
                intersection_data[group_id] = {
                    'MITRE Group Name': group_name,
                    'MITRE Associated Names': associated_groups,
                    'Malpedia Actor Name': actor_name,
                    'Malpedia Aliases':synonyms
                }
                
                break

        # If no match was found for this MITRE group, add it to the "not in Malpedia" list
        if not found_match:
            mitre_groups_not_in_malpedia.append((group_name, group_id))

          # Write the intersection data to a JSON file
    with open(output_file, 'w') as json_file:
        json.dump(intersection_data, json_file, indent=4)

    return intersection, unique_group_ids, mitre_groups_not_in_malpedia

In [23]:
# Call the function to read the actors from the JSON file
malpedia_actors_data = read_actors_from_file()
mitre_group_mappings = read_group_mappings_mitre()
# Find the intersection
intersection, unique_group_ids, mitre_groups_not_in_malpedia = find_intersection_with_malpedia_actors(malpedia_actors_data, mitre_group_mappings, output_file="group_intersection_output.json")


In [24]:
# # Print the intersection results
# print(f"Total intersection based on unique group IDs: {len(unique_group_ids)}")
# if intersection:
#     print("\nIntersection found between MITRE groups and Malpedia actors:")
#     for group_name, actor_name, group_id in intersection:
#         print(f"MITRE Group: {group_name} | Malpedia Actor: {actor_name} | Group ID: {group_id}")
# else:
#     print("No intersection found between MITRE groups and Malpedia actors.")

# # Print the MITRE groups not present in Malpedia
# if mitre_groups_not_in_malpedia:
#     print(f"\nTotal MITRE groups not present in Malpedia: {len(mitre_groups_not_in_malpedia)}")
#     for group_name, group_id in mitre_groups_not_in_malpedia:
#         print(f"MITRE Group: {group_name} | Group ID: {group_id}")

In [25]:
# Function to read the JSON and count the number of keys
def count_json_keys(json_file_path):
    # Open and load the JSON file
    with open(json_file_path, 'r') as file:
        data = json.load(file)

    # Count the number of keys in the JSON data (which is a dictionary)
    num_keys = len(data)
    return num_keys

# Example usage
json_file_path = 'group_intersection_output.json'  # Replace with your actual file path
num_keys = count_json_keys(json_file_path)

print(f"Number of keys in the JSON file: {num_keys}")


Number of keys in the JSON file: 144


In [26]:
# Load the MITRE group mappings (this should be the mappings created from the Excel files)
# mitre_group_mappings = read_group_mappings_mitre()
    
# # Open file in append mode (or write mode, if needed) with utf-8 encoding
# with open('group_name_alias_normalization.csv', 'a', newline='', encoding='utf-8') as csvfile:
#         csvwriter = csv.writer(csvfile)

#         # Check if the file is empty and write the header if necessary
#         try:
#             # Try to move to the beginning of the file
#             csvfile.seek(0, 2)  # Go to the end of the file
#             if csvfile.tell() == 0:  # If file size is 0, write header
#                 csvwriter.writerow(['Group Name', 'Aliases', 'Normalized Name'])
#         except Exception as e:
#             print(f"Error checking file for header: {e}")


        
#                 # Loop through each MITRE group in mitre_group_mappings
#         for group_id, group_info in mitre_group_mappings.items():
#             original_group_name = group_info['name']
#             normalized_group_name = normalize_group_name(group_info['name'])  # Normalize group name
#             associated_groups = group_info.get('associated_groups', '')
#             if isinstance(associated_groups, float) or associated_groups is None:
#                 associated_groups = ''

#             associated_groups = [(ag) for ag in associated_groups] if isinstance(associated_groups, list) else [(associated_groups)]

#              # Write MITRE group name, aliases (comma-separated), and normalized name to file
#             csvwriter.writerow([f"mitre_{original_group_name}", ', '.join(associated_groups), normalized_group_name])


#                 # Loop through each actor in Malpedia actors data
#         for actor_id, actor_info in malpedia_actors_data.items():
#                 actor_name = actor_info['value']  # Use the original actor name
#                 aliases = actor_info.get('meta', {}).get('synonyms', [])  # Get synonyms (aliases)
                
#                 # Ensure aliases is always a list
#                 if not isinstance(aliases, list):
#                     aliases = []

#                 normalized_actor_name = normalize_group_name(actor_name)  # Normalize actor name
#                 aliases = [(alias) for alias in aliases]  

#                 # Write Malpedia group name, aliases (comma-separated), and normalized name to file
#                 csvwriter.writerow([f"malpedia_{actor_name}", ', '.join(aliases), normalized_actor_name])
