In [34]:
# Function to read a .bib file and count its entries
def count_bib_entries(file_path):
    count = 0
    with open(file_path, 'r') as file:
        for line in file:
            # Strip whitespace and check if line starts with '@'
            if line.strip().startswith('@'):
                count += 1
    return count


# Count entries in both files
count1 = count_bib_entries(file1)
count2 = count_bib_entries(file2)

# Print the counts for each file
print(f"Number of entries in {file1}: {count1}")
print(f"Number of entries in {file2}: {count2}")

# Optional: Print the total count if you were to combine them
total_count = count1 + count2
print(f"Total number of entries combined: {total_count}")


Number of entries in /Users/aakankshahome/Downloads/malpedia-db_2022-07-18.bib: 12047
Number of entries in /Users/aakankshahome/Downloads/malpedia-db_2024-11-11.bib: 15985
Total number of entries combined: 28032


In [35]:
# File paths for the two .bib files
file1 = '/Users/aakankshahome/Downloads/malpedia-db_2022-07-18.bib'
file2 = '/Users/aakankshahome/Downloads/malpedia-db_2024-11-11.bib'

In [36]:
# Function to extract BibTeX entries from a file as a dictionary
def read_bib_entries(file_path):
    entries = {}
    with open(file_path, 'r') as file:
        entry = None
        for line in file:
            line = line.strip()
            if line.startswith('@') and '{' in line:
                # Start of a new entry
                if entry:
                    entries[entry['key']] = entry  # Save the previous entry
                start = line.index('{') + 1
                end = line.index(',', start)
                key = line[start:end].strip()
                entry = {'key': key, 'fields': {}}  # New entry
            elif '=' in line and entry:
                # Extract field-value pairs (e.g., url = {...})
                field, value = line.split('=', 1)
                field = field.strip().lower()
                value = value.strip().strip('{}',).strip('"')  # Clean value
                entry['fields'][field] = value
            elif line == '}' and entry:
                # End of the current entry
                entries[entry['key']] = entry
                entry = None
    return entries

In [37]:
# Function to read BibTeX entries from a file and return a set of citation keys
def read_bib_keys(file_path):
    keys = set()
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            # Look for lines that start with '@' and contain '{', indicating the start of an entry
            if line.startswith('@') and '{' in line:
                # Extract the citation key between '{' and ','
                start = line.index('{') + 1
                end = line.index(',', start)
                key = line[start:end].strip()
                keys.add(key)
    return keys

In [42]:
# Function to extract and dump URLs from a BibTeX file
def extract_and_dump_urls(file_path, output_file):
    # Read all BibTeX entries from the file
    entries = read_bib_entries(file_path)
    
    # Extract URLs
    urls = []
    for key, entry in entries.items():
        # Check if 'url' field exists and extract it
        if 'url' in entry['fields']:
            url = entry['fields']['url'].strip('{}')  # Remove curly braces around URL
            urls.append(url)
    
    # Dump extracted URLs into the specified output file
    with open(output_file, 'w') as out_file:
        for url in urls:
            out_file.write(url + '\n')  # Write each URL on a new line
    
    print(f"Dumped {len(urls)} URLs to {output_file}")
    return urls

file_path = '/Users/aakankshahome/Downloads/malpedia-db_2024-11-11.bib'
output_file = 'urls.txt'  # Output file to dump URLs

unique_urls = extract_and_dump_urls(file_path, output_file)


Dumped 15985 URLs to urls.txt


In [26]:
### Malpedia API interaction

In [1]:
import requests

# Malpedia API base URL
base_url = "https://malpedia.caad.fkie.fraunhofer.de/api"

# Your Malpedia API key
api_key = "5d9f217ae97ab6bf82ba7e32c863a772717f74c0"  # Replace with your actual API key

# Headers with authentication
headers = {
    "Authorization": f"Bearer {api_key}"
}

# Function to get details of actors from Malpedia
def get_actors():
    endpoint = f"{base_url}/get/actors"
    response = requests.get(endpoint, headers=headers)
    
    # Check if the request was successful
    if response.status_code == 200:
        actors_data = response.json()  # Parse JSON response
        return actors_data
    else:
        print(f"Failed to retrieve actors: {response.status_code}")
        return None

# Retrieve actor details and print some samples
actors = get_actors()
if actors:
    print("Number of actors found:", len(actors))
    print("Sample actor details:")
    #for actor in list(actors.values())[:5]:  # Display sample details of first 5 actors
    #    print("Name:", actor.get("name", "N/A"))
    #    print("Country:", actor.get("country", "N/A"))
    #    print("Description:", actor.get("description", "N/A"))
    #    print("Associated Groups:", actor.get("associated_groups", "N/A"))
    #    print("---")


Number of actors found: 781
Sample actor details:


In [2]:
import requests

# Base URL for Malpedia API
base_url = "https://malpedia.caad.fkie.fraunhofer.de/api"

# Replace with your actual API key
api_key = "5d9f217ae97ab6bf82ba7e32c863a772717f74c0"

# Set up headers for authentication
headers = {
    "Authorization": f"Bearer {api_key}"
}

# Function to retrieve details about malware actors
def fetch_malware_actors():
    try:
        # Endpoint to get actor details
        endpoint = f"{base_url}/get/actors"
        response = requests.get(endpoint, headers=headers)
        
        # Check if the request was successful
        if response.status_code == 200:
            actors_data = response.json()  # Parse JSON response
            return actors_data
        else:
            print(f"Failed to retrieve actors. Status code: {response.status_code}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error occurred: {e}")
        return None

# Function to retrieve malware families
def fetch_malware_families():
    try:
        # Endpoint to get malware families
        endpoint = f"{base_url}/get/families"
        response = requests.get(endpoint, headers=headers)
        
        # Check if the request was successful
        if response.status_code == 200:
            families_data = response.json()  # Parse JSON response
            return families_data
        else:
            print(f"Failed to retrieve families. Status code: {response.status_code}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error occurred while fetching families: {e}")
        return None

# Function to retrieve and save the BibTeX references for a specific actor
def fetch_actor_bibtex(actor_id):
    try:
        # Endpoint to get BibTeX references for the specified actor
        endpoint = f"{base_url}/get/bib/actor/{actor_id}"
        response = requests.get(endpoint, headers=headers)
        
        # Check if the request was successful
        if response.status_code == 200:
            bib_data = response.text  # Get the BibTeX data as plain text
            
            # Save the .bib data to a file
            filename = f"{actor_id}.bib"
            with open(filename, 'w') as file:
                file.write(bib_data)
            
            print(f"BibTeX references for actor '{actor_id}' saved to {filename}.")
        else:
            print(f"Failed to retrieve BibTeX for actor {actor_id}. Status code: {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"Error occurred while fetching BibTeX for actor {actor_id}: {e}")

# Function to display some sample actor details
def display_actor_samples(actors_data):
    if not actors_data:
        print("No actor data to display.")
        return

    print("Number of actors found:", len(actors_data))
    print("Sample actor details:")
    for actor_id, actor_info in list(actors_data.items())[:5]:  # Show sample details for first 5 actors
        print(f"ID: {actor_id}")
        print("  Name:", actor_info.get("name", "N/A"))
        print("  Country:", actor_info.get("country", "N/A"))
        print("  Description:", actor_info.get("description", "N/A"))
        print("  Associated Groups:", actor_info.get("associated_groups", "N/A"))
        print("---")
        # Fetch BibTeX references for the first actor as an example
        fetch_actor_bibtex(actor_id)
        break  # Fetch for only the first sample actor

# Function to display some sample malware family details
def display_family_samples(families_data):
    if not families_data:
        print("No family data to display.")
        return

    print("Number of families found:", len(families_data))
    print("Sample family details:")
    for family_id, family_info in list(families_data.items())[:5]:  # Show sample details for first 5 families
        print(f"ID: {family_id}")
        print("  Name:", family_info.get("name", "N/A"))
        print("  Description:", family_info.get("description", "N/A"))
        print("  Associated Actors:", family_info.get("associated_actors", "N/A"))
        print("---")

# Main execution
actors_data = fetch_malware_actors()
# Uncomment to display actor details
# display_actor_samples(actors_data)

families_data = fetch_malware_families()
# Uncomment to display family details
#display_family_samples(families_data)


In [3]:
#actors_data
families_data

{'aix.fastcash': {'urls': ['https://blog.lexfo.fr/ressources/Lexfo-WhitePaper-The_Lazarus_Constellation.pdf',
   'https://blog.talosintelligence.com/2019/05/10-years-of-virtual-dynamite.html',
   'https://github.com/fboldewin/FastCashMalwareDissected/',
   'https://i.blackhat.com/USA-20/Wednesday/us-20-Perlow-FASTCash-And-INJX_Pure-How-Threat-Actors-Use-Public-Standards-For-Financial-Fraud-wp.pdf',
   'https://i.blackhat.com/USA-20/Wednesday/us-20-Perlow-FASTCash-And-INJX_Pure-How-Threat-Actors-Use-Public-Standards-For-Financial-Fraud.pdf',
   'https://i.blackhat.com/eu-20/Wednesday/eu-20-Rivera-From-Zero-To-Sixty-The-Story-Of-North-Koreas-Rapid-Ascent-To-Becoming-A-Global-Cyber-Superpower.pdf',
   'https://mal-eats.net/en/2021/05/11/campo_new_attack_campaign_targeting_japan/',
   'https://symantec-blogs.broadcom.com/blogs/threat-intelligence/fastcash-lazarus-atm-malware',
   'https://threatrecon.nshc.net/2019/01/23/sectora01-custom-proxy-utility-tool-analysis/',
   'https://www.cisa.g