In [1]:
import re
import pickle
import string
import copy
from pathlib import Path

import pandas as pd
import numpy as np
import spacy
import requests
import feedparser
from bs4 import BeautifulSoup

In [37]:
path = 'https://www.cisa.gov/uscert/ncas/alerts.xml'
data_path = Path("../data")

In [3]:
d = feedparser.parse(path)
d['feed']['title']

'CISA Alerts'

In [4]:
d.feed.title

'CISA Alerts'

In [5]:
d.feed.link

'https://us-cert.cisa.gov/'

In [6]:
d.feed.description

'Alerts warn about vulnerabilities, incidents, and other security issues that pose a significant risk.'

In [7]:
d.entries[0].title

'AA22-321A: #StopRansomware: Hive Ransomware'

In [8]:
d.entries[0].link

'https://us-cert.cisa.gov/ncas/alerts/aa22-321a'

In [9]:
d.entries[0].description[:100]

'Original release date: November 17, 2022 | Last revised: November 25, 2022<br /><h3>Summary</h3><p c'

In [10]:
d.entries[0].published

'Thu, 17 Nov 2022 17:00:00 +0000'

In [11]:
d.entries[0].published_parsed

time.struct_time(tm_year=2022, tm_mon=11, tm_mday=17, tm_hour=17, tm_min=0, tm_sec=0, tm_wday=3, tm_yday=321, tm_isdst=0)

In [12]:
d.entries[0].id

'18160 at https://us-cert.cisa.gov'

# get alerts beautifulsoup

In [13]:
import requests
from bs4 import BeautifulSoup

In [14]:
url='https://www.cisa.gov/uscert/ncas/alerts'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')

In [15]:
soup.title

<title>Alerts | CISA</title>

In [16]:
alert_url='https://www.cisa.gov/uscert/ncas/alerts/aa22-321a'
alert_page = requests.get(alert_url)
alert_soup = BeautifulSoup(alert_page.text, 'html.parser')
soup.title

<title>Alerts | CISA</title>

In [17]:
%%time
#get content for each alert on the first 10 pages of alerts
alerts_new = []
soup_alerts_path = Path('soup_alerts.pkl')
if soup_alerts_path.exists():
    with open(soup_alerts_path, "rb") as sap:
        alerts = pickle.load(sap)
else:
    # I think there's only 8 or so pages
    for p_num in range(10): #look at 10 pages of results
        #get the the content from this page
        url='https://www.cisa.gov/uscert/ncas/alerts?page=' + str(p_num)
        page = requests.get(url)
        soup = BeautifulSoup(page.text, 'html.parser')
        table = soup.find_all(class_="views-field views-field-title")

        for i in range(len(table)):
            #get the alert details from this line in the list
            link = table[i].find('a')['href']
            alert_id = table[i].find(class_="field-content").contents[0]
            title = table[i].find('a').text

            #get alert content
            alert_url='https://www.cisa.gov/uscert' + link
            alert_page = requests.get(alert_url)
            alert_soup = BeautifulSoup(alert_page.text, 'html.parser')
            alert_date = alert_soup.find(id="ncas-header").find(class_="submitted meta-text").text
            alert_text = alert_soup.find(id="ncas-content").text

            #write the content
            alerts_new.append([link, alert_id, title, alert_date, alert_text])

    with open(soup_alerts_path, "wb") as sap:
        pickle.dump(alerts_new, sap)
    alerts = alerts_new
alerts[0]

CPU times: user 39.5 ms, sys: 6.65 ms, total: 46.2 ms
Wall time: 46.1 ms


['/ncas/alerts/aa22-321a',
 'AA22-321A : ',
 '#StopRansomware: Hive Ransomware',
 '\n                                Original release date: November 17, 2022  | Last revised: November 25, 2022\n                                            ',
 '\n\nSummary\nActions to Take Today to Mitigate Cyber Threats from Ransomware:\n• Prioritize remediating known exploited vulnerabilities.\n• Enable and enforce multifactor authentication with strong passwords\n• Close unused ports and remove any application not deemed necessary for day-to-day operations.\nNote: This joint Cybersecurity Advisory (CSA) is part of an ongoing #StopRansomware effort to publish advisories for network defenders that detail various ransomware variants and ransomware threat actors. These #StopRansomware advisories include recently and historically observed tactics, techniques, and procedures (TTPs) and indicators of compromise (IOCs) to help organizations protect against ransomware. Visit stopransomware.gov to see all #Stop

In [18]:
%%time
headers = ['link', 'alert_id', 'title', 'date', 'text']
df = pd.DataFrame(alerts, columns=headers)
df.head()

CPU times: user 772 µs, sys: 0 ns, total: 772 µs
Wall time: 772 µs


Unnamed: 0,link,alert_id,title,date,text
0,/ncas/alerts/aa22-321a,AA22-321A :,#StopRansomware: Hive Ransomware,\n Original rel...,\n\nSummary\nActions to Take Today to Mitigate...
1,/ncas/alerts/aa22-320a,AA22-320A :,Iranian Government-Sponsored APT Actors Compro...,\n Original rel...,\n\nSummary\nFrom mid-June through mid-July 20...
2,/ncas/alerts/aa22-294a,AA22-294A :,#StopRansomware: Daixin Team,\n Original rel...,\n\nSummary\nActions to take today to mitigate...
3,/ncas/alerts/aa22-279a,AA22-279A :,Top CVEs Actively Exploited By People’s Republ...,\n ...,\n\nSummary\nThis joint Cybersecurity Advisory...
4,/ncas/alerts/aa22-277a,AA22-277A :,Impacket and Exfiltration Tool Used to Steal S...,\n Original rel...,\n\nSummary\nActions to Help Protect Against A...


# parse alert content

In [19]:
re_cve = re.compile(r'(CVE-\d{4}-\d+)')
re_cve.findall(alerts[0][4])

['CVE-2020-12812',
 'CVE-2021-31207',
 'CVE-2021-34473',
 'CVE-2021-34523',
 'CVE-2021-34473',
 'CVE-2021-34523',
 'CVE-2021-31207',
 'CVE-2021-42321']

In [20]:
re_ttp = re.compile(r'(\[|\s)(T\d{4})\.?\d*(\]|\s)')
[ttp[1] for ttp in re_ttp.findall(alerts[1][4])]

['T1190',
 'T1059',
 'T1562',
 'T1105',
 'T1070',
 'T1136',
 'T1016',
 'T1053',
 'T1021',
 'T1078',
 'T1136',
 'T1090',
 'T1018',
 'T1098',
 'T1003',
 'T1190',
 'T1059',
 'T1098',
 'T1136',
 'T1136',
 'T1053',
 'T1078',
 'T1562',
 'T1070',
 'T1003',
 'T1555',
 'T1018',
 'T1016',
 'T1021',
 'T1090',
 'T1105']

In [21]:
#regex for cve match
re_cve = re.compile(r'(CVE-\d{4}-\d*)')
cves = []
for i in range(len(alerts)):
    alert_cves = sorted(re_cve.findall(alerts[i][4]))
    cves.append(alert_cves)
cves = [set(cve) for cve in cves]

ttps = []
for i in range(len(alerts)):
    alert_ttps = [ttp[1] for ttp in re_ttp.findall(alerts[i][4])]
    alert_ttps = sorted(alert_ttps)
    ttps.append(alert_ttps)
ttps = [set(ttp) for ttp in ttps]

In [22]:
#add list of CVE & TTP to df
df['CVE_list'] = cves
df['TTP_list'] = ttps
print(df.describe())
df.head()

                          link      alert_id  \
count                      286           286   
unique                     286           286   
top     /ncas/alerts/aa22-321a  AA22-321A :    
freq                         1             1   

                                                 title  \
count                                              286   
unique                                             210   
top     Microsoft Updates for Multiple Vulnerabilities   
freq                                                56   

                                                     date  \
count                                                 286   
unique                                                277   
top     \n                                Original rel...   
freq                                                    2   

                                                     text CVE_list TTP_list  
count                                                 286      286      286  
unique

Unnamed: 0,link,alert_id,title,date,text,CVE_list,TTP_list
0,/ncas/alerts/aa22-321a,AA22-321A :,#StopRansomware: Hive Ransomware,\n Original rel...,\n\nSummary\nActions to Take Today to Mitigate...,"{CVE-2021-34523, CVE-2020-12812, CVE-2021-3447...","{T1486, T1070, T1112, T1537, T1133, T1190, T15..."
1,/ncas/alerts/aa22-320a,AA22-320A :,Iranian Government-Sponsored APT Actors Compro...,\n Original rel...,\n\nSummary\nFrom mid-June through mid-July 20...,{CVE-2021-44228},"{T1016, T1070, T1021, T1003, T1098, T1053, T11..."
2,/ncas/alerts/aa22-294a,AA22-294A :,#StopRansomware: Daixin Team,\n Original rel...,\n\nSummary\nActions to take today to mitigate...,{},"{T1486, T1598, T1550, T1567, T1003, T1098, T11..."
3,/ncas/alerts/aa22-279a,AA22-279A :,Top CVEs Actively Exploited By People’s Republ...,\n ...,\n\nSummary\nThis joint Cybersecurity Advisory...,"{CVE-2021-36260, CVE-2021-44228, CVE-2021-4201...",{}
4,/ncas/alerts/aa22-277a,AA22-277A :,Impacket and Exfiltration Tool Used to Steal S...,\n Original rel...,\n\nSummary\nActions to Help Protect Against A...,"{CVE-2021-26857, CVE-2021-26858, CVE-2021-2685...","{T1082, T1574, T1033, T1039, T1036, T1059, T10..."


# NER

In [23]:
# Some cleaning up of text field before NER
strings_to_replace= ["Summary", ("\n","  "),("\t","  "), re.compile(r"DISCLAIMER.*"), re.compile(r"INFORMATION REQUESTED.*")]
text = copy.deepcopy(df['text'])

def replace_non_printable(raw_text):
    filtered_text = list(filter(lambda x: x in string.printable, raw_text))
    return "".join(filtered_text)

for str_pattern in strings_to_replace:
    new_text = " "
    old_text = str_pattern
    if type(str_pattern) is tuple:
        new_text = str_pattern[1]
        old_text = str_pattern[0]
    
    if isinstance(str_pattern,str):
        text = text.str.replace(old_text,new_text).str.strip()
    else:
        text = text.str.replace(old_text,new_text,regex=True).str.strip()

# Remove Nonprintable, CVE IDs, and TTPs
text = text.apply(lambda raw_x: replace_non_printable(raw_x))
text = text.str.replace(re_cve,"",regex=True)
text = text.str.replace(re_ttp,"",regex=True)

df['text2'] = text
text[0]

'Actions to Take Today to Mitigate Cyber Threats from Ransomware:   Prioritize remediating known exploited vulnerabilities.   Enable and enforce multifactor authentication with strong passwords   Close unused ports and remove any application not deemed necessary for day-to-day operations.  Note: This joint Cybersecurity Advisory (CSA) is part of an ongoing #StopRansomware effort to publish advisories for network defenders that detail various ransomware variants and ransomware threat actors. These #StopRansomware advisories include recently and historically observed tactics, techniques, and procedures (TTPs) and indicators of compromise (IOCs) to help organizations protect against ransomware. Visit stopransomware.gov to see all #StopRansomware advisories and to learn more about other ransomware threats and no-cost resources.  The Federal Bureau of Investigation (FBI), the Cybersecurity and Infrastructure Security Agency (CISA), and the Department of Health and Human Services (HHS) are r

In [26]:
# Clean alert id
df['alert_id'] = df['alert_id'].str.replace(' :','').str.strip()

# Extract date from date field
df['date'] = pd.to_datetime(df['date'].str.replace("Original release date: ","",regex=False).str.replace("\|.*","",regex=True).str.strip())

#convert link to full path
df['full_path'] = 'https://www.cisa.gov' + df['link']


In [24]:
nlp = spacy.load("en_core_web_trf")

In [None]:
%%time
# Extract entity from each text2
ner_pickle_path = data_path / "ner_text2.pkl"
if 'ner2' in df.columns:
    print("Not running the entity recognition, since the data already exists as 'ner' in the dataframe.")
elif ner_pickle_path.exists():
    with open(ner_pickle_path, "rb") as npp:
        df["ner2"] = pickle.load(ner_pickle_path)
else:
    ner2 = []
    for i in range(len(df['text2'])):
        #nlp this doc
        this_doc = nlp(df['text2'][i])

        #select get entities from this doc
        this_ner = []
        for ent in this_doc.ents:
            #choose from list of 
            ent_labels = ['ORG','PRODUCT','GPE','PERSON','NORP']
            if ent.label_ in ent_labels:
                this_ner.append([ent.text, ent.label_])

        #append ner for this doc
        ner2.append(this_ner)
    with open(ner_pickle_path, "wb") as npp:
        pickle.dump(ner2, npp)
    df['ner2'] = ner2
df['ner2']

In [28]:
%%time
#extract entity from each text
ner_pickle_path = Path("alerts") / "ner.pkl"
if 'ner' in df.columns:
    print("Not running the entity recognition, since the data already exists as 'ner' in the dataframe.")
elif ner_pickle_path.exists():
    with open(ner_pickle_path, "rb") as npp:
        df['ner'] = pickle.load(ner_pickle_path)
else:
    ner = []
    for i in range(len(alerts)):
        #nlp this doc
        this_doc = nlp(alerts[i][4])

        #select get entities from this doc
        this_ner = []
        for ent in this_doc.ents:
            #choose from list of 
            ent_labels = ['ORG','PRODUCT','GPE','PERSON','NORP']
            if ent.label_ in ent_labels:
                this_ner.append([ent.text, ent.label_])

        #append ner for this doc
        ner.append(this_ner)
    with open(ner_pickle_path, "wb") as npp:
        pickle.dump(ner, npp)

    # add ner into df. leave duplicates for other opportunity
    df['ner'] = ner

df['ner']

Not running the entity recognition, since the data already exists as 'ner' in the dataframe.
CPU times: user 50 µs, sys: 12 µs, total: 62 µs
Wall time: 71.3 µs


In [None]:
%%time
# Extract entity from each text2
ner_pickle_path = data_path / "ner_text2.pkl"
if 'ner2' in df.columns:
    print("Not running the entity recognition, since the data already exists as 'ner' in the dataframe.")
elif ner_pickle_path.exists():
    with open(ner_pickle_path, "rb") as npp:
        df["ner2"] = pickle.load(ner_pickle_path)
else:
    ner2 = []
    for i in range(len(df['text2'])):
        #nlp this doc
        this_doc = nlp(df['text2'][i])

        #select get entities from this doc
        this_ner = []
        for ent in this_doc.ents:
            #choose from list of 
            ent_labels = ['ORG','PRODUCT','GPE','PERSON','NORP']
            if ent.label_ in ent_labels:
                this_ner.append([ent.text, ent.label_])

        #append ner for this doc
        ner2.append(this_ner)
    with open(ner_pickle_path, "wb") as npp:
        pickle.dump(ner2, npp)
    df['ner2'] = ner2
df['ner2']

In [None]:
# REmove this after running once!
if "ner2" not in locals():
    ner2 = copy.deepcopy(ner)

In [30]:
# Only write pickle if df has values
if df is not None and not df.empty:
    df.to_pickle("./alerts_df.pkl")

In [31]:
# Load in the dataframe if it's not there already
if "df" not in locals():
    df = pd.read_pickle("./alerts_df.pkl")

In [32]:
df.head()

Unnamed: 0,link,alert_id,title,date,text,CVE_list,TTP_list,text2,full_path,ner
0,/ncas/alerts/aa22-321a,AA22-321A,#StopRansomware: Hive Ransomware,2022-11-17,\n\nSummary\nActions to Take Today to Mitigate...,"{CVE-2021-34523, CVE-2020-12812, CVE-2021-3447...","{T1486, T1070, T1112, T1537, T1133, T1190, T15...",Actions to Take Today to Mitigate Cyber Threat...,https://www.cisa.gov/ncas/alerts/aa22-321a,"[[stopransomware.gov, ORG], [The Federal Burea..."
1,/ncas/alerts/aa22-320a,AA22-320A,Iranian Government-Sponsored APT Actors Compro...,2022-11-16,\n\nSummary\nFrom mid-June through mid-July 20...,{CVE-2021-44228},"{T1016, T1070, T1021, T1003, T1098, T1053, T11...","From mid-June through mid-July 2022, CISA cond...",https://www.cisa.gov/ncas/alerts/aa22-320a,"[[CISA, ORG], [Federal Civilian Executive Bran..."
2,/ncas/alerts/aa22-294a,AA22-294A,#StopRansomware: Daixin Team,2022-10-21,\n\nSummary\nActions to take today to mitigate...,{},"{T1486, T1598, T1550, T1567, T1003, T1098, T11...",Actions to take today to mitigate cyber threat...,https://www.cisa.gov/ncas/alerts/aa22-294a,"[[The Federal Bureau of Investigation, ORG], [..."
3,/ncas/alerts/aa22-279a,AA22-279A,Top CVEs Actively Exploited By People’s Republ...,2022-10-06,\n\nSummary\nThis joint Cybersecurity Advisory...,"{CVE-2021-36260, CVE-2021-44228, CVE-2021-4201...",{},This joint Cybersecurity Advisory (CSA) provid...,https://www.cisa.gov/ncas/alerts/aa22-279a,"[[People’s Republic of China, GPE], [PRC, GPE]..."
4,/ncas/alerts/aa22-277a,AA22-277A,Impacket and Exfiltration Tool Used to Steal S...,2022-10-04,\n\nSummary\nActions to Help Protect Against A...,"{CVE-2021-26857, CVE-2021-26858, CVE-2021-2685...","{T1082, T1574, T1033, T1039, T1036, T1059, T10...",Actions to Help Protect Against APT Cyber Acti...,https://www.cisa.gov/ncas/alerts/aa22-277a,[[the Cybersecurity and Infrastructure Securit...


In [33]:
print(df.iloc[[0],[3]])

        date
0 2022-11-17


# alert to CVE edges

In [38]:
#alert to cve edges
alert_cve = df[['alert_id','CVE_list']].explode('CVE_list')
alert_cve = alert_cve.drop_duplicates().dropna().reset_index(drop=True)
alert_cve.to_csv(data_path / 'alert_cve_edge.csv', index=False)
alert_cve

Unnamed: 0,alert_id,CVE_list
0,AA22-321A,CVE-2021-34523
1,AA22-321A,CVE-2020-12812
2,AA22-321A,CVE-2021-34473
3,AA22-321A,CVE-2021-31207
4,AA22-321A,CVE-2021-42321
...,...,...
391,TA09-195A,CVE-2008-0015
392,TA09-195A,CVE-2009-1537
393,TA09-133B,CVE-2009-1492
394,TA09-133B,CVE-2009-1493


# alert nodes

In [40]:
alerts_nodes = df[['alert_id','full_path','title','date']].drop_duplicates()
alerts_nodes.to_csv(data_path / 'alert_nodes.csv')
alerts_nodes

Unnamed: 0,alert_id,full_path,title,date
0,AA22-321A,https://www.cisa.gov/ncas/alerts/aa22-321a,#StopRansomware: Hive Ransomware,2022-11-17
1,AA22-320A,https://www.cisa.gov/ncas/alerts/aa22-320a,Iranian Government-Sponsored APT Actors Compro...,2022-11-16
2,AA22-294A,https://www.cisa.gov/ncas/alerts/aa22-294a,#StopRansomware: Daixin Team,2022-10-21
3,AA22-279A,https://www.cisa.gov/ncas/alerts/aa22-279a,Top CVEs Actively Exploited By People’s Republ...,2022-10-06
4,AA22-277A,https://www.cisa.gov/ncas/alerts/aa22-277a,Impacket and Exfiltration Tool Used to Steal S...,2022-10-04
...,...,...,...,...
281,TA08-352A,https://www.cisa.gov/ncas/alerts/TA08-352A,Microsoft Internet Explorer Data Binding Vulne...,2008-12-17
282,TA08-350A,https://www.cisa.gov/ncas/alerts/TA08-350A,Apple Updates for Multiple Vulnerabilities,2008-12-15
283,TA08-344A,https://www.cisa.gov/ncas/alerts/TA08-344A,Microsoft Updates for Multiple Vulnerabilities,2008-12-09
284,TA08-340A,https://www.cisa.gov/ncas/alerts/TA08-340A,Sun Java Updates for Multiple Vulnerabilities,2008-12-05


# dedupe

In [41]:
import os
import csv
import re
import logging
import optparse

import dedupe
from unidecode import unidecode

In [67]:
#alert to entity edges
df_ner = df[['alert_id','ner']].explode('ner')
df_ner[['label','type']] = pd.DataFrame(df_ner.ner.tolist(), index= df_ner.index)
df_ner=df_ner.reset_index(level=0)
df_ner=df_ner.drop(columns='ner')

In [46]:
ner_dedupe = df_ner[['label', 'type']]
ner_dedupe = ner_dedupe.drop_duplicates()
ner_dedupe = ner_dedupe.reset_index(drop=True)
ner_dedupe.to_csv(data_path / 'alert_ner.csv', index=True, index_label = 'Id', columns=['label','type'])
ner_dedupe

Unnamed: 0,label,type
0,stopransomware.gov,ORG
1,The Federal Bureau of Investigation,ORG
2,FBI,ORG
3,the Cybersecurity and Infrastructure Security ...,ORG
4,CISA,ORG
...,...,...
3632,Mozilla Foundation,ORG
3633,Firefox 3.0.4,PRODUCT
3634,Firefox 2.0.0.18,PRODUCT
3635,SeaMonkey 1.1.13,PRODUCT


In [49]:
#Do a little bit of data cleaning with the help of Unidecode and Regex. Things like casing, extra spaces, quotes and new lines can be ignored.

def preProcess(column):
#
    column = unidecode(column)
    column = re.sub('  +', ' ', column)
    column = re.sub('\n', ' ', column)
    column = column.strip().strip('"').strip("'").lower().strip()
#If data is missing, indicate that by setting the value to None

    if not column:
        column = None
    return column
#Read in our data from a CSV file and create a dictionary of records, where the key is a unique record ID and each value is dict

def readData(filename):
#
    data_d = {}
    with open(filename, encoding="utf8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            clean_row = [(k, preProcess(v)) for (k, v) in row.items()]
            row_id = int(row['Id'])
            data_d[row_id] = dict(clean_row)

    return data_d


In [54]:
input_file = data_path / 'alert_ner.csv'
output_file = data_path / 'ner_cluster.csv'
training_file = data_path / 'ner_training.json'

In [50]:
data_d=readData(input_file)

In [52]:
#define dedupe
fields = [
            {'field': 'label', 'type': 'String'},            {'field': 'type', 'type': 'String'}
            ]
deduper = dedupe.Dedupe(fields)

In [53]:
print('preparing training...')
if os.path.exists(training_file):
    print('reading labeled examples from ', training_file)
    with open(training_file, 'rb') as f:
        deduper.prepare_training(data_d, f)
else:
    deduper.prepare_training(data_d)

preparing training...


In [55]:
print('starting active labeling...')
dedupe.console_label(deduper)

label : the cybersecurity and infrastructure security agency
type : org

label : the cybersecurity and infrastructure security agency's
type : org

0/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished


starting active labeling...


 y


label : the department of the treasury
type : org

label : the department of the treasury's
type : org

1/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : internet explorer
type : product

label : internet explorer 7
type : product

2/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : internet explorer
type : product

label : internet explorer's
type : product

3/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : the daixin team
type : org

label : the daixin team's
type : org

4/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows server 2008 r2 sp1
type : product

label : windows server 2008
type : product

5/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : the uk government
type : org

label : the uk government.[19][20
type : org

6/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : volume shadow
type : product

label : volume shadow copies
type : product

7/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : flash playeradobe
type : product

label : flash player
type : product

8/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : photoshop cs5
type : product

label : photoshop
type : product

9/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : cert nz[7
type : org

label : cert nz
type : org

10/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : firefox
type : product

label : firefox 34
type : product

11/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : microsoft security
type : org

label : microsoft security blog
type : org

12/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : flash player 10
type : product

label : flash player 10.0.22.87
type : product

13/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows nt 10.0
type : product

label : windows nt
type : product

14/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : sysinternals autoruns
type : product

label : sysinternals
type : product

15/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : onapsis
type : org

label : onapsis inc.
type : org

16/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows server
type : product

label : windows server update services
type : product

17/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : pulse secure
type : product

label : pulse secure vpn vulnerability
type : product

18/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : mac os
type : product

label : mac os x.
type : product

19/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : mac os x
type : product

label : mac os x v10.5.6
type : product

20/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : azure ad
type : product

label : azure
type : product

21/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : sharepoint
type : product

label : sharepoint server 2010 sp2
type : product

22/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : exchange
type : product

label : exchange server 2013
type : product

23/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : apt41
type : product

label : apt
type : product

24/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : java
type : product

label : javascript
type : product

25/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : macos x
type : product

label : macos x program
type : product

25/10 positive, 1/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : cisco
type : org

label : cisco blogs
type : org

26/10 positive, 1/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : u.s
type : gpe

label : u.s.-china
type : gpe

27/10 positive, 1/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : nmap scripting engine
type : product

label : nmap
type : product

27/10 positive, 2/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : win
type : product

label : windowsinternet
type : product

28/10 positive, 2/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : microsoft
type : org

label : microsoftmicrosoft
type : org

28/10 positive, 3/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : shockwave playerapsb11
type : product

label : shockwave
type : product

29/10 positive, 3/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows
type : org

label : windowsmicrosoft
type : org

30/10 positive, 3/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : netsupport
type : product

label : netsupport remote access tool
type : product

31/10 positive, 3/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : win64
type : product

label : win
type : product

32/10 positive, 3/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 u


label : mac os x
type : product

label : mac os x v10.5.8 / security update 2009-003
type : product

32/10 positive, 3/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : office
type : product

label : office compatibility pack sp3
type : product

33/10 positive, 3/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : mac os
type : product

label : mac os x"/applications/adobe reader 9
type : product

33/10 positive, 4/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : uk
type : org

label : ukgov
type : org

33/10 positive, 5/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : atlassian jira data center
type : org

label : atl
type : org

34/10 positive, 5/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows server 2003
type : product

label : 7windows server 2003windows
type : product

34/10 positive, 6/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : java 7
type : product

label : java se critical patch update advisory update
type : product

35/10 positive, 6/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : e5
type : product

label : windows xp service pack 3windows xp professional x64 edition service pack 2windows server 2003
type : product

35/10 positive, 7/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : exchange server vulnerabilities mitigations
type : product

label : 9
type : product

35/10 positive, 8/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : the microsoft enhanced mitigation experience toolkit (emet)emet can be used to help prevent exploitation of the flash vulnerabilities
type : product

label : tiger
type : product

35/10 positive, 9/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : microsoft exchange on-premises mitigation tool
type : org

label : microsift
type : product

35/10 positive, 10/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : u.s
type : gpe

label : u.s. environmental protection agency
type : org

35/10 positive, 11/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : me
type : product

label : mac os x"/applications/adobe reader 9
type : product

35/10 positive, 12/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : 6
type : product

label : forefront unified access gatewaymicrosoft
type : product

35/10 positive, 13/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : win.bat
type : product

label : windows registry editor version 5.00[hkey_classes_root\acroexch.document.7]"editflags"=hex:00,00,00,00disable the display of pdf files
type : product

35/10 positive, 14/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : 3a
type : product

label : adobe reader.app/contents/frameworks/adobe3d.framework"gnu/linux
type : product

35/10 positive, 15/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : cisa 24/7 operations center
type : org

label : cig
type : org

35/10 positive, 16/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : mac os x 10.5
type : product

label : mac os x v10.5.7
type : product

35/10 positive, 17/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : sharepoint services
type : product

label : sharepoint server
type : product

36/10 positive, 17/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : china chopper web shell
type : product

label : china chopper's
type : product

36/10 positive, 18/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows 10
type : product

label : windows 8
type : product

37/10 positive, 18/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : flash player 10.0.32.18
type : product

label : flash player 10.0.22.87
type : product

37/10 positive, 19/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows server
type : product

label : windows registry editor version 5.00[hkey_classes_root\acroexch.document.7]"editflags"=hex:00,00,00,00disable the display of pdf files
type : product

38/10 positive, 19/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 10
type : product

label : windows xp)sophoshttps://www.sophos.com
type : product

38/10 positive, 20/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : cobalt strike beacon
type : product

label : cobalt strike c2
type : product

38/10 positive, 21/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows 7
type : product

label : windows mail
type : product

39/10 positive, 21/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : gtsss
type : org

label : gtsss
type : product

39/10 positive, 22/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows 8
type : product

label : windows mail
type : product

40/10 positive, 22/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : exchange
type : org

label : exchange online
type : product

40/10 positive, 23/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : macos
type : org

label : macos x
type : product

40/10 positive, 24/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : java 6
type : product

label : java se critical patch update
type : product

41/10 positive, 24/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : triconex
type : org

label : triconex tricon
type : product

41/10 positive, 25/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : mouseisland
type : product

label : mouseisland
type : gpe

42/10 positive, 25/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : shockwave player 11.6.8.638
type : product

label : shockwave player 11.6.0.626
type : product

43/10 positive, 25/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows 8
type : product

label : windows installer
type : product

44/10 positive, 25/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows command shell
type : product

label : windows
type : gpe

44/10 positive, 26/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows nt 10.0
type : product

label : windows 7
type : product

44/10 positive, 27/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : ryuk
type : product

label : ryukreadme
type : org

44/10 positive, 28/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows 7
type : product

label : windows server 2019
type : product

45/10 positive, 28/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : active directory--
type : product

label : active directory user accounts
type : product

45/10 positive, 29/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : java 7
type : product

label : java se critical patch update advisory -
type : product

45/10 positive, 30/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : shodan
type : product

label : shodan limited liability company
type : org

45/10 positive, 31/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : chinese ministry of state security
type : org

label : the chinese ministry of state security
type : org

46/10 positive, 31/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : acrobat x
type : product

label : acrobat javascript).prevent
type : product

47/10 positive, 31/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 7
type : product

label : windows operating system
type : product

47/10 positive, 32/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 7
type : product

label : windows live mailmicrosoft
type : product

47/10 positive, 33/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows operating systems (oss
type : product

label : windows 7
type : product

47/10 positive, 34/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows server 2008 r2
type : product

label : windows server 2008 sp2
type : product

47/10 positive, 35/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows operating systems (oss
type : product

label : windows xp
type : product

48/10 positive, 35/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 8
type : product

label : windows server semi-annual branch
type : product

48/10 positive, 36/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : the united states
type : gpe

label : the united states secret service
type : org

48/10 positive, 37/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : national cybersecurity and communications integration center
type : org

label : the dhs national cybersecurity and communications integration center
type : org

48/10 positive, 38/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows netlogon remote protocol
type : product

label : windows 7
type : product

49/10 positive, 38/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 7
type : product

label : windows embedded open type (eot)
type : product

49/10 positive, 39/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : vnc
type : product

label : vnclogs
type : org

49/10 positive, 40/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : office 2013
type : product

label : office 2003
type : product

49/10 positive, 41/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows
type : org

label : windows backup manager
type : product

49/10 positive, 42/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : office 2013
type : product

label : office 2003 web components sp3
type : product

49/10 positive, 43/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : adobe security bulletin apsb13-
type : product

label : adobe security bulletin apsb10-22
type : org

49/10 positive, 44/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : samsam
type : product

label : samsam
type : person

50/10 positive, 44/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows
type : gpe

label : windows rt
type : product

51/10 positive, 44/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows
type : gpe

label : windows nt 5.1
type : product

52/10 positive, 44/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : the united states
type : org

label : the united states[1][2][3
type : gpe

53/10 positive, 44/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : palo alto networks
type : org

label : palo alto
type : gpe

54/10 positive, 44/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows 8
type : product

label : windows xp)heimdalhttp://goz.heimdalsecurity.com/ (microsoft windows xp
type : product

55/10 positive, 44/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 7
type : product

label : windows nt directory services
type : product

55/10 positive, 45/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : macintoshadobe reader 9.5
type : product

label : macintoshadobe reader 8.2.6
type : product

55/10 positive, 46/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : win7elevate
type : product

label : win7elevatve
type : product

56/10 positive, 46/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows 7
type : product

label : windows server 2003.[3
type : product

57/10 positive, 46/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : the united states.[4][5
type : gpe

label : the united states,[3
type : gpe

57/10 positive, 47/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : the united kingdom's
type : gpe

label : the united kingdom[7][8
type : gpe

58/10 positive, 47/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows command shell
type : product

label : windows 7
type : product

59/10 positive, 47/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows server 2016
type : product

label : windows server 2008
type : product

59/10 positive, 48/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : the united kingdom[5
type : gpe

label : the united kingdom,[4
type : gpe

59/10 positive, 49/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : the united kingdom's
type : gpe

label : the united kingdom,[4
type : gpe

60/10 positive, 49/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : the cybersecurity and infrastructure security agency
type : org

label : the cybersecurity and infrastructure agency
type : org

61/10 positive, 49/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : internetexplorer
type : product

label : internet explorer's
type : product

62/10 positive, 49/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows 8
type : product

label : windows event logs
type : product

63/10 positive, 49/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 8
type : product

label : windows 10.disable
type : product

63/10 positive, 50/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 11
type : product

label : windows xp)heimdalhttp://goz.heimdalsecurity.com/ (microsoft windows xp
type : product

63/10 positive, 51/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows operating system
type : org

label : windows xp
type : product

63/10 positive, 52/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 7
type : product

label : windows server update services
type : org

63/10 positive, 53/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : u.s.
type : gpe

label : u.s. government
type : org

63/10 positive, 54/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : exchange servers
type : org

label : exchange server 2013
type : product

64/10 positive, 54/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows operating system
type : org

label : windows 7
type : product

65/10 positive, 54/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : the united kingdom's
type : gpe

label : the united kingdom national cyber security centre
type : org

65/10 positive, 55/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 7
type : product

label : windows nt 6.1
type : product

65/10 positive, 56/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : u.s
type : gpe

label : u.s. doj
type : org

65/10 positive, 57/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows remote
type : product

label : windows 7
type : product

65/10 positive, 58/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : adobe security bulletin apsb11-17
type : org

label : adobe security bulletin apsb09
type : product

65/10 positive, 59/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : central scientific institute of chemistry and mechanics
type : org

label : central scientific research institute of chemistry and mechanics
type : org

66/10 positive, 59/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows event
type : product

label : windows 7
type : product

67/10 positive, 59/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : securityweek
type : org

label : security week
type : org

67/10 positive, 60/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : crashoverride
type : product

label : crashoveride
type : product

68/10 positive, 60/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : the multi-state information sharing & analysis center
type : org

label : multi-state information sharing and analysis center
type : org

69/10 positive, 60/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows systems
type : product

label : windows registry editor version 5.00[hkey_classes_root\acroexch.document.7]"editflags"=hex:00,00,00,00disable the display of pdf files
type : product

70/10 positive, 60/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : the u.s. doj
type : org

label : the u.s. department of labor
type : org

70/10 positive, 61/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : java 5
type : product

label : java 7 update 11
type : product

70/10 positive, 62/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : us-cert's
type : org

label : us-cert technical cyber security alert
type : org

70/10 positive, 63/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 8.1
type : product

label : windows registry editor version 5.00[hkey_classes_root\acroexch
type : product

70/10 positive, 64/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : java 5
type : product

label : java 7 update 7
type : product

70/10 positive, 65/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows server 2003's end of support
type : product

label : windowsgr
type : product

70/10 positive, 66/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows server message block 1.0
type : product

label : windows 8)
type : product

70/10 positive, 67/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : flash player 10.1.95.1
type : product

label : flash player 10.1.53.64
type : product

70/10 positive, 68/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : national security agency
type : org

label : the national security agency's
type : org

71/10 positive, 68/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows 11
type : product

label : windows xp)mcafeewww.mcafee.com
type : product

72/10 positive, 68/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : java 7
type : product

label : java web start
type : product

72/10 positive, 69/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows nt 5.2
type : product

label : windows nt 6.1
type : product

72/10 positive, 70/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows 11
type : product

label : windows defender exploit guard
type : product

73/10 positive, 70/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 95
type : product

label : windows server update services
type : product

73/10 positive, 71/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows xp)heimdalhttp://goz.heimdalsecurity.com/ (microsoft windows xp
type : product

label : windows mail
type : product

73/10 positive, 72/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows hardware compatibility program
type : org

label : windows nt
type : product

73/10 positive, 73/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : nvidia corporation
type : org

label : cn=nvidia corporation
type : org

73/10 positive, 74/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : the department of the treasury
type : org

label : u.s. department of the treasury
type : org

74/10 positive, 74/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : mitre att&ck(r)
type : product

label : mitre att&ck version 11: software -
type : product

75/10 positive, 74/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows 10
type : product

label : windows active directory
type : product

76/10 positive, 74/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : virus total
type : product

label : virustotal
type : product

76/10 positive, 75/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows management instrumentation
type : org

label : windows 95
type : product

77/10 positive, 75/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : microsoft security response center
type : org

label : microsoft security blog
type : org

77/10 positive, 76/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows server 2003
type : product

label : windows xp service pack 3windows xp professional x64 edition service pack 2windows server 2003
type : product

77/10 positive, 77/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : java 5
type : product

label : java 6.use
type : product

77/10 positive, 78/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows server 2012 r2
type : product

label : windows 8)
type : product

78/10 positive, 78/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : 8
type : product

label : versionsadobe air 1.5.2
type : product

78/10 positive, 79/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows print spooler
type : product

label : windows 95
type : product

78/10 positive, 80/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows print spooler
type : product

label : windows 95
type : product

78/10 positive, 81/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows print spooler
type : product

label : windows nt
type : product

78/10 positive, 82/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : flash player 10.3.185.25
type : product

label : flash player 10.0.45.2
type : product

78/10 positive, 83/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows defenderthe
type : product

label : windows registry editor version 5.00[hkey_classes_root\acroexch.document.7]"editflags"=hex:00,00,00,00disable the display of pdf files
type : product

79/10 positive, 83/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : the united kingdom.[8
type : gpe

label : the united kingdom national cyber security centre
type : org

79/10 positive, 84/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows server 2016
type : product

label : windows 8)
type : product

79/10 positive, 85/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows defenderthe
type : product

label : windows 95
type : product

79/10 positive, 86/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows 2000windows
type : product

label : windows 95
type : product

79/10 positive, 87/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : enhanced mitigation experience toolkit 5.2
type : product

label : the enhanced mitigation experience toolkit
type : product

79/10 positive, 88/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows rt
type : product

label : windows 10.disable
type : product

80/10 positive, 88/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : dragonflybsd
type : org

label : dragos
type : product

80/10 positive, 89/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows server 2003
type : product

label : windowsgr
type : product

80/10 positive, 90/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows server 2016
type : product

label : windows server semi-annual branch
type : product

80/10 positive, 91/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : java se
type : product

label : java 5
type : product

80/10 positive, 92/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows defender(r) exploit guard
type : product

label : windows 8.1
type : product

81/10 positive, 92/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : security bulletin ms13
type : product

label : security bulletin apsb10-
type : product

81/10 positive, 93/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : firefox 34
type : product

label : firefox 2.0.0.18
type : product

81/10 positive, 94/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : windows defender
type : product

label : windows rt
type : product

82/10 positive, 94/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : windows defender
type : product

label : windows 95
type : product

82/10 positive, 95/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : atlassian data center
type : org

label : atlassian jira data center
type : org

82/10 positive, 96/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : confluencelogs
type : org

label : cnn
type : org

83/10 positive, 96/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : server 2008
type : product

label : server 2012
type : product

83/10 positive, 97/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 n


label : u.s. department of justice
type : org

label : the department of justice
type : org

83/10 positive, 98/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 y


label : arkansas
type : gpe

label : argentinabelgiumbrazilcambodiachinacolombiaegyptindiairanjordanpakistansaudi
type : gpe

84/10 positive, 98/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


 f


Finished labeling


In [56]:
deduper.train()

with open(training_file, 'w') as tf:
        deduper.write_training(tf)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

In [57]:
print('clustering...')
clustered_dupes = deduper.partition(data_d, 0.5)

print('# duplicate sets', len(clustered_dupes))

clustering...
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid dead

In [58]:
cluster_membership = {}
for cluster_id, (records, scores) in enumerate(clustered_dupes):
    for record_id, score in zip(records, scores):
        cluster_membership[record_id] = {
            "Cluster ID": cluster_id,
            "confidence_score": score
        }

with open(output_file, 'w') as f_output, open(input_file, encoding="utf8") as f_input:

    reader = csv.DictReader(f_input)
    fieldnames = ['Cluster ID', 'confidence_score'] + reader.fieldnames

    writer = csv.DictWriter(f_output, fieldnames=fieldnames)
    writer.writeheader()

    for row in reader:
        row_id = int(row['Id'])
        row.update(cluster_membership[row_id])
        writer.writerow(row)

# alert to ner edge

In [66]:
df_clusters = pd.read_csv(data_path / 'ner_cluster.csv')

** TODO **
1. Remove punctuation from labels
2. Remove any labels that have only < 2 characters once punctuation is removed
3. Plot the confidence scores and check accuracy around the chosen threshold
4. Figure out why so many matches are not correct. "stopransomware" matching with CISA, FBI doesn't make sense, same with "MS Windows and Firefox, Thunderbird, SeaMonkey.
**5. Match "PRODUCT" type entities to CPE list**



In [68]:
df_ner_clusters = df_ner.merge(df_clusters, left_on='index', right_on='Id', suffixes=["_x","_y"]).drop(columns="index")
df_ner_clusters

Unnamed: 0,alert_id,label_x,type_x,Cluster ID,confidence_score,Id,label_y,type_y
0,AA22-321A,stopransomware.gov,ORG,1,0.800972,0,stopransomware.gov,ORG
1,AA22-321A,The Federal Bureau of Investigation,ORG,1,0.800972,0,stopransomware.gov,ORG
2,AA22-321A,FBI,ORG,1,0.800972,0,stopransomware.gov,ORG
3,AA22-321A,the Cybersecurity and Infrastructure Security ...,ORG,1,0.800972,0,stopransomware.gov,ORG
4,AA22-321A,CISA,ORG,1,0.800972,0,stopransomware.gov,ORG
...,...,...,...,...,...,...,...,...
18405,TA08-319A,Thunderbird,PRODUCT,794,1.000000,285,MS Windows,PRODUCT
18406,TA08-319A,SeaMonkey 1.1.13,PRODUCT,794,1.000000,285,MS Windows,PRODUCT
18407,TA08-319A,Mozilla Foundation,ORG,794,1.000000,285,MS Windows,PRODUCT
18408,TA08-319A,Mozilla,ORG,794,1.000000,285,MS Windows,PRODUCT


In [76]:
alert_ner_edge = pd.DataFrame(df_ner_clusters.groupby(by=['alert_id','label_x']).size())
alert_ner_edge = alert_ner_edge.reset_index()
alert_ner_edge.to_csv(data_path / 'alert_ner_edge.csv', index=False, header=['alert_id','ner','weight'])
alert_ner_edge

Unnamed: 0,alert_id,label_x,0
0,AA18-284A,ACSC,2
1,AA18-284A,APT19,2
2,AA18-284A,Active Directory,1
3,AA18-284A,Adobe,1
4,AA18-284A,Adwind RAT,2
...,...,...,...
7230,TA18-331A,Windows,1
7231,TA18-331A,YARA,1
7232,TA18-331A,the Department of Homeland Security,1
7233,TA18-331A,the Federal Bureau of Investigation,1


# ner node

In [74]:
ner_node = df_ner_clusters.groupby(by=['label_x','Cluster ID']).size()
ner_node = ner_node.reset_index()
ner_node.to_csv(data_path / 'ner_node.csv', index=False, header=['ner','cluster id','weight'])
ner_node

Unnamed: 0,label_x,Cluster ID,0
0,!,744,1
1,%AppData% folder,712,1
2,-,7,2
3,-,47,1
4,-,75,1
...,...,...,...
7097,workstation,58,1
7098,x64 Edition Service Pack,751,2
7099,x86,62,1
7100,yum,44,1
