# Title: HW vs SW security issues
### Purpose: Compare issues from NVD & CWE findings
### Author: @mjz


## Background
https://github.com/cve-search/cve-search

https://api.mongodb.com/python/current/tutorial.html





## CWE - Common Weakness Enumeration

### 

In [1]:
import pymongo 
import math
import pprint
import pandas as pd

In [2]:
from pymongo import MongoClient
client = MongoClient('localhost', 27017)
print(client.list_database_names())
cvedb = client.get_database('cvedb')
print(cvedb.list_collection_names())

['admin', 'config', 'cvedb', 'local']
['cwe', 'capec', 'mgmt_blacklist', 'cpe', 'via4', 'mgmt_whitelist', 'cves', 'info', 'cpeother']


In [3]:
cves = cvedb.cves
print(cves.count_documents({}))
cpe = cvedb.cpe
print(cpe.count_documents({}))
cwe = cvedb.cwe
print(cwe.count_documents({}))
via4 = cvedb.via4
print(via4.count_documents({}))
info = cvedb.info
print(info.count_documents({}))
capec = cvedb.capec
print(capec.count_documents({}))
cpeother = cvedb.cpeother
print(info.count_documents({}))

140341
300209
1005
117666
5
0
5


In [4]:
df =  pd.DataFrame(list(cves.find())).set_index('id', drop=False) 
# df['Published'] = df.index

In [153]:
df_cpe = pd.DataFrame(list(cpe.find()))

In [None]:
df_cpe.describe()

In [5]:
df_cwe = pd.DataFrame(list(cwe.find()))

In [11]:
df_cwe.dtypes

_id              object
id               object
Description      object
name             object
status           object
weaknessabs      object
relationships    object
dtype: object

In [12]:
df_via4 = pd.DataFrame(list(via4.find()))

In [13]:
df_via4.head()

Unnamed: 0,_id,id,refmap,redhat,oval,saint,statements,d2sec
0,5e6ecbc70690a655a387b35f,CVE-2006-7056,{'bugtraq': ['20060605 [MajorSecurity #9]HostA...,,,,,
1,5e6ecbc70690a655a387b360,CVE-2015-8878,{'confirm': ['http://www.php.net/ChangeLog-5.p...,,,,,
2,5e6ecbc70690a655a387b361,CVE-2018-6233,{'confirm': ['https://esupport.trendmicro.com/...,,,,,
3,5e6ecbc70690a655a387b362,CVE-2010-3590,"{'sectrack': ['1024972'], 'confirm': ['http://...",,,,,
4,5e6ecbc70690a655a387b363,CVE-2013-6411,{'confirm': ['http://bugs.openttd.org/task/582...,,,,,


In [118]:
df.dtypes

_id                                         object
id                                          object
assigner                                    object
Published                           datetime64[ns]
Modified                            datetime64[ns]
summary                                     object
access                                      object
impact                                      object
cvss                                       float64
cvss-time                           datetime64[ns]
cvss-vector                                 object
references                                  object
vulnerable_configuration                    object
vulnerable_product                          object
cwe                                         object
vulnerable_configuration_cpe_2_2            object
dtype: object

In [110]:
df.columns

Index(['_id', 'id', 'assigner', 'Published', 'Modified', 'summary', 'access',
       'impact', 'cvss', 'cvss-time', 'cvss-vector', 'references',
       'vulnerable_configuration', 'vulnerable_product', 'cwe',
       'vulnerable_configuration_cpe_2_2'],
      dtype='object')

In [120]:
 g = df.groupby([df['Published'].dt.year])

In [133]:
import numpy as np

print((g.cvss.count(), g.cvss.sum()/g.cvss.count()))

(Published
1988        2
1989        3
1990       11
1991       15
1992       14
1993       13
1994       26
1995       25
1996       75
1997      253
1998      247
1999      923
2000     1020
2001     1679
2002     2170
2003     1548
2004     2479
2005     5010
2006     6659
2007     6596
2008     5664
2009     5778
2010     4667
2011     4172
2012     5351
2013     5324
2014     8008
2015     6595
2016     6517
2017    18113
2018    18154
2019    18938
2020     4266
Name: cvss, dtype: int64, Published
1988    10.000000
1989     7.266667
1990     6.354545
1991     7.406667
1992     7.192857
1993     6.438462
1994     6.288462
1995     7.520000
1996     6.270667
1997     6.401186
1998     6.275304
1999     6.169231
2000     6.179608
2001     6.020608
2002     6.140829
2003     6.146835
2004     5.940702
2005     5.863932
2006     6.091035
2007     6.629776
2008     6.670410
2009     6.568588
2010     6.518642
2011     6.462560
2012     6.123715
2013     6.075470
2014     5.780969
2015 

## ATT&CK. STIX, TAXII, etc



From [CTI github](https://github.com/mitre/cti/blob/master/USAGE.md): *In ATT&CK, there are three main concepts (excluding Tactics for now): Techniques, Groups, and Software. Most techniques also have Mitigations. STIX 2.0 describes these as objects and uses different terminology to describe them. The following table is a mapping of ATT&CK concepts to STIX 2.0 objects:*

```
### ATT&CK concept ###
Technique 	
Group 	
Software 	
Mitigation
Tactic
Matrix


### STIX Object ###
attack-pattern
intrusion-set
malware or tool
course-of-action
course-of-action
x-mitre-tactic

```





## Import and load STIX datasets

In [13]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from itertools import chain

# from owlready2 import *
from stix2 import *
from stix2 import FileSystemSource as fs
from stix2 import Filter
from stix2.utils import get_type_from_id

cti_home = '../mitre/cti/'
enterprise = cti_home + 'enterprise-attack'
mobile = cti_home + 'mobile-attack'
pre = cti_home + 'pre-attack'
capec = cti_home + 'capec'

e = fs(enterprise)
c = fs(capec)
p = fs(pre)
m = fs(mobile)

sources = [e, c, p, m]

all_ds = CompositeDataSource()
all_ds.add_data_sources([e, m, p, c])

%pwd


'/opt/projects/diss/jupyter_nbs/mine'

## Functions for querying STIX data

In [6]:
def get_all_software(src):
    filts = [
        [Filter('type', '=', 'malware')],
        [Filter('type', '=', 'tool')]
    ]
    return list(chain.from_iterable(
        src.query(f) for f in filts
    ))


def get_all_techniques(src):
    filt = [Filter('type', '=', 'attack-pattern')]
    return src.query(filt)
    
def get_technique_by_name(src, name):
    filt = [
        Filter('type', '=', 'attack-pattern'),
        Filter('name', '=', name)
    ]
    return src.query(filt)

def get_techniques_by_content(src, content):
    techniques = get_all_techniques(src)
    return [
        tech for tech in techniques
        if content.lower() in tech.description.lower()
    ]

def get_techniques_since_time(src, timestamp):
    filt = [
        Filter('type', '=', 'attack-pattern'),
        Filter('created', '>', timestamp)
    ]
    return src.query(filt)

def get_object_by_attack_id(src, typ, attack_id):
    filt = [
        Filter('type', '=', typ),
        Filter('external_references.external_id', '=', attack_id)
    ]
    return src.query(filt)

def get_group_by_alias(src, alias):
    return src.query([
        Filter('type', '=', 'intrusion-set'),
        Filter('aliases', '=', alias)
    ])

def get_technique_by_group(src, stix_id):
    relations = src.relationships(stix_id, 'uses', source_only=True)
    return src.query([
        Filter('type', '=', 'attack-pattern'),
        Filter('id', 'in', [r.target_ref for r in relations])
    ])

def get_techniques_by_group_software(src, group_stix_id):
    # get the malware, tools that the group uses
    group_uses = [
        r for r in src.relationships(group_stix_id, 'uses', source_only=True)
        if get_type_from_id(r.target_ref) in ['malware', 'tool']
    ]

    # get the technique stix ids that the malware, tools use
    software_uses = src.query([
        Filter('type', '=', 'relationship'),
        Filter('relationship_type', '=', 'uses'),
        Filter('source_ref', 'in', [r.source_ref for r in group_uses])
    ])

    #get the techniques themselves
    return src.query([
        Filter('type', '=', 'attack-pattern'),
        Filter('id', 'in', [r.target_ref for r in software_uses])
    ])

def get_technique_users(src, tech_stix_id):
    groups = [
        r.source_ref
        for r in src.relationships(tech_stix_id, 'uses', target_only=True)
        if get_type_from_id(r.source_ref) == 'intrusion-set'
    ]

    software = [
        r.source_ref
        for r in src.relationships(tech_stix_id, 'uses', target_only=True)
        if get_type_from_id(r.source_ref) in ['tool', 'malware']
    ]

    return src.query([
        Filter('type', 'in', ['intrusion-set', 'malware', 'tool']),
        Filter('id', 'in', groups + software)
    ])

def get_techniques_by_platform(src, platform):
    return src.query([
        Filter('type', '=', 'attack-pattern'),
        Filter('x_mitre_platforms', '=', platform)
    ])

def get_tactic_techniques(src, tactic):
    techs =  src.query([
        Filter('type', '=', 'attack-pattern'),
        Filter('kill_chain_phases.phase_name', '=', tactic)
    ])

    # double checking the kill chain is MITRE ATT&CK
    return [t for t in techs if {
            'kill_chain_name' : 'mitre-attack',
            'phase_name' : tactic,
    } in t.kill_chain_phases]

def get_mitigations_by_technique(src, tech_stix_id):
    relations = src.relationships(tech_stix_id, 'mitigates', target_only=True)
    return src.query([
        Filter('type', '=', 'course-of-action'),
        Filter('id', 'in', [r.source_ref for r in relations])
    ])

def getTacticsByMatrix(src):
    tactics = {}
    matrix = src.query([
        Filter('type', '=', 'x-mitre-matrix'),
    ])

    for i in range(len(matrix)):
        tactics[matrix[i]['name']] = []
        for tactic_id in matrix[i]['tactic_refs']:
            tactics[matrix[i]['name']].append(src.query([Filter('id', '=', tactic_id)])[0])    
    
    return tactics

def getRevokedBy(stix_id, src):
    relations = src.relationships(stix_id, 'revoked-by', source_only=True)
    revoked_by = src.query([
        Filter('id', 'in', [r.target_ref for r in relations]),
        Filter('revoked', '=', False)
    ])
    if revoked_by is not None:
        revoked_by = revoked_by[0]

    return revoked_by



In [7]:
for s in sources:
    print(s)
#     print(get_all_software(s))
    print(get_all_techniques(s)[0])
#     get_all_software(s)

<stix2.datastore.filesystem.FileSystemSource object at 0x7f2f2dfc6518>
{
    "type": "attack-pattern",
    "id": "attack-pattern--be2dcee9-a7a7-4e38-afd6-21b31ecc3d63",
    "created_by_ref": "identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5",
    "created": "2018-04-18T17:59:24.739Z",
    "modified": "2019-07-24T17:35:40.934Z",
    "name": "Exploitation for Client Execution",
    "description": "Vulnerabilities can exist in software due to unsecure coding practices that can lead to unanticipated behavior. Adversaries can take advantage of certain vulnerabilities through targeted exploitation for the purpose of arbitrary code execution. Oftentimes the most valuable exploits to an offensive toolkit are those that can be used to obtain code execution on a remote system because they can be used to gain access to that system. Users will expect to see files related to the applications they commonly used to do work, so they are a useful target for exploit research and development because of the

In [8]:
print(get_all_software(all_ds))

[Malware(type='malware', id='malware--00c3bfcb-99bd-4767-8c03-b08f585f5c8a', created_by_ref='identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5', created='2017-05-31T21:33:19.746Z', modified='2019-04-22T22:31:38.093Z', name='PowerDuke', description='[PowerDuke](https://attack.mitre.org/software/S0139) is a backdoor that was used by [APT29](https://attack.mitre.org/groups/G0016) in 2016. It has primarily been delivered through Microsoft Word or Excel attachments containing malicious macros. (Citation: Volexity PowerDuke November 2016)', labels=['malware'], external_references=[ExternalReference(source_name='mitre-attack', url='https://attack.mitre.org/software/S0139', external_id='S0139'), ExternalReference(source_name='PowerDuke', description='(Citation: Volexity PowerDuke November 2016)'), ExternalReference(source_name='Volexity PowerDuke November 2016', description='Adair, S.. (2016, November 9). PowerDuke: Widespread Post-Election Spear Phishing Campaigns Targeting Think Tanks and NGOs.

In [9]:
getTacticsByMatrix(all_ds)

{'Enterprise ATT&CK': [{'type': 'x-mitre-tactic',
   'name': 'Initial Access',
   'description': 'The adversary is trying to get into your network.\n\nInitial Access consists of techniques that use various entry vectors to gain their initial foothold within a network. Techniques used to gain a foothold include targeted spearphishing and exploiting weaknesses on public-facing web servers. Footholds gained through initial access may allow for continued access, like valid accounts and use of external remote services, or may be limited-use due to changing passwords.',
   'created_by_ref': 'identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5',
   'created': '2018-10-17T00:14:20.652Z',
   'id': 'x-mitre-tactic--ffd5bcee-6e16-4dd2-8eca-7b3beedf33ca',
   'x_mitre_shortname': 'initial-access',
   'modified': '2019-07-19T17:41:41.425Z',
   'object_marking_refs': ['marking-definition--fa42a846-8d90-4e51-bc29-71d5b4802168'],
   'external_references': [{'source_name': 'mitre-attack',
     'external_id':

In [10]:
ts = get_all_techniques(all_ds)
for t in ts:
     if "x_mitre_system_requirements" in t.keys() and "x_capec_prerequisites" in t.keys():
            print(t["name"])
     print(t)
#     print(t["name"])
#     if "x_mitre_system_requirements" in t.keys():
#         print('\treqs: ' , t["x_mitre_system_requirements"])
#     if "x_capec_prerequisites" in t.keys():
#         print('\tprereqs: ', ' : ',  t["x_capec_prerequisites"])

{
    "type": "attack-pattern",
    "id": "attack-pattern--be2dcee9-a7a7-4e38-afd6-21b31ecc3d63",
    "created_by_ref": "identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5",
    "created": "2018-04-18T17:59:24.739Z",
    "modified": "2019-07-24T17:35:40.934Z",
    "name": "Exploitation for Client Execution",
    "description": "Vulnerabilities can exist in software due to unsecure coding practices that can lead to unanticipated behavior. Adversaries can take advantage of certain vulnerabilities through targeted exploitation for the purpose of arbitrary code execution. Oftentimes the most valuable exploits to an offensive toolkit are those that can be used to obtain code execution on a remote system because they can be used to gain access to that system. Users will expect to see files related to the applications they commonly used to do work, so they are a useful target for exploit research and development because of their high utility.\n\nSeveral types exist:\n\n### Browser-based Exploitat

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [11]:

filt = Filter('type', '=', 'attack-pattern')

e_techniques = e.query([filt])
c_techniques = c.query([filt])
p_techniques = p.query([filt])
m_techniques = m.query([filt])

print(type(e_techniques), len(e_techniques))
print(type(c_techniques), len(c_techniques))
print(type(p_techniques), len(p_techniques))
print(type(m_techniques), len(m_techniques))
# for t in c_techniques: 
ts = get_all_techniques(all_ds)
for t in ts:
#     print(t)
    print(t['name']) 


<class 'list'> 266
<class 'list'> 570
<class 'list'> 174
<class 'list'> 95
Exploitation for Client Execution
Windows Management Instrumentation
Data from Local System
Component Firmware
Fallback Channels
Account Discovery
Data from Cloud Storage Object
Clear Command History
Dynamic Data Exchange
Standard Cryptographic Protocol
Internal Spearphishing
Credential Dumping
Registry Run Keys / Startup Folder
Logon Scripts
File and Directory Permissions Modification
System Network Connections Discovery
Disk Structure Wipe
Steal Application Access Token
New Service
CMSTP
Indicator Removal on Host
Input Prompt
Implant Container Image
Hardware Additions
Network Service Scanning
Unused/Unsupported Cloud Regions
LC_MAIN Hijacking
Browser Bookmark Discovery
Browser Extensions
Remote File Copy
Automated Exfiltration
Clipboard Data
Application Access Token
LC_LOAD_DYLIB Addition
Server Software Component
Port Knocking
Spearphishing Link
Uncommonly Used Port
Plist Modification
Template Injection
Permi

## Sec1

and some more text

In [12]:
# onto_path.append("/path/to/your/local/ontology/repository")
onto_path.append("/opt/projects/diss/jupyter_nbs/Unified-Cybersecurity-Ontology")
# onto = get_ontology("http://www.lesfleursdunormal.fr/static/_downloads/pizza_onto.owl")
onto = get_ontology("uco_1_5_rdf.owl")
onto.load()
print(list(onto.classes()))
print(len(list(onto.classes())))

NameError: name 'onto_path' is not defined

## Sec2

and some more text

## Sec3

and some more text

## Sec4

and some more text

## Refs

and some more text