#### Import the mdeasm library (easiest if this notebook is running out of the same directory as the mdeasm.py file) and set the details for your workspace, tenant, subscription, and service principal.

In [None]:
import mdeasm

workspace_name = ''
tenant_id = ''
subscription_id = ''
client_id = ''
client_secret = ''

my_easm_workspace = mdeasm.Workspaces(workspace_name=workspace_name, tenant_id=tenant_id, subscription_id=subscription_id, client_id=client_id, client_secret=client_secret)

#### Interacting with an already-created workspace and retrieving its details.

In [None]:
print("current default workspace:", my_easm_workspace._default_workspace_name)

print("\ncurrent workspace details\n", my_easm_workspace.get_workspaces(workspace_name=workspace_name))

print("\ncurrent workspace disco group details\n", my_easm_workspace.get_discovery_groups())

#### Creating a new workspace and submitting custom seeds for initial discovery.

In [3]:
new_workspace_name = 'JustSomeEmptyWoodenHorses'
resource_group_name = ''
#hint: support EASM resource locations can be found in my_easm_workspace._locations
easm_region = ''

print("\ncreate a new workspace and show its details\n", my_easm_workspace.create_workspace(workspace_name=new_workspace_name, resource_group_name=resource_group_name, location=easm_region))

print("\nnew default workspace\n", my_easm_workspace._default_workspace_name)

custom_discovery_template = {
                'name':new_workspace_name,
                'seeds':{
                    'domain':['trojanhorses.biz'],
                    #'ipBlock':['10.10.0.0/16', '192.168.123.234/32'],
                    #'host':['www.mydomain.com', 'mail.mydomain.com'],
                    #'contact':['admin@mydomain.com', 'user@mydomain.com'],
                    #'as':['ASN1234', '987654'],
                    #'attribute':['WhoisOrganization:MY ORG NAME 1', 'WhoisOrganization:MY ORG NAME 2']
                }
            }

print("\nSubmit a custom discovery template and show its initial run details\n", my_easm_workspace.create_discovery_group(disco_custom=custom_discovery_template))

#### Creating a new workspace and searching for pre-built discovery templates.

In [2]:
new_workspace_name = 'RiskIQ'

print("\ncreate a new workspace and show its details\n", my_easm_workspace.create_workspace(workspace_name=new_workspace_name, resource_group_name=resource_group_name, location=easm_region))

print("\nnew default workspace\n", my_easm_workspace._default_workspace_name)

# it's best to be generic with this lookup value
# the UI will generally be a better option for this step
print("\nfound pre-built discovery templates\n")
my_easm_workspace.get_discovery_templates(org_name='RiskIQ')

# we can display details of the pre-built template using the the template id number
my_easm_workspace.get_discovery_template_by_id(template_id='88256')

#### Submitting a pre-built discovery template for a discovery run.

In [3]:
# choose a template from the results (or modify your search and try again)
# this function requires the discovery template string to be submitted **exactly** as printed in the .get_discovery_templates() output
my_easm_workspace.create_discovery_group(disco_template='RiskIQ, Inc---88256')

Switching back to the original workspace

In [2]:
# switch the default workspace if necessary
my_easm_workspace._default_workspace_name = workspace_name
print("current default workspace:", my_easm_workspace._default_workspace_name)

#### query the workspace assets

refer to the MDEASM documentation (https://learn.microsoft.com/en-us/azure/external-attack-surface-management/inventory-filters) for valid query options and operators **or** run a query in MDEASM UI with browser developer tools (F12 --> Network tab) open and copy the query from the `filter` parameter

e.g.:

      'state in ("candidate", "associatedThirdparty") AND kind !in ("as", "contact")'

      'createdAt between ("2022-12-25T07:00:00.000Z", "2023-01-01T07:00:00.000Z")'
      
      'state = "confirmed" AND webComponentName !empty'

depending on the query, the size of the workspace, whether you choose to retrieve every asset identified by your query (get_all=True), and even your internet speed...this may take some time

In [5]:
# a query to return all domains in 'Approved' state
domain_query = 'state = "confirmed" AND kind = "domain"'
my_easm_workspace.get_workspace_assets(query_filter=domain_query, asset_list_name='all_domains', get_all=True, max_page_size=50)

In [4]:
# a query to return all 'Approved' assets with any webComponent values
web_component_query = 'state = "confirmed" AND webComponentName !empty'
my_easm_workspace.get_workspace_assets(query_filter=web_component_query, asset_list_name='all_web_components', get_all=True, max_page_size=100)

In [3]:
# a query to return all 'Approved' wildard assets
wildcard_query = 'state = "confirmed" AND wildcard = "true"'
my_easm_workspace.get_workspace_assets(query_filter=wildcard_query)

a query to return EVERY asset in a workspace (i.e.: all asset states)

In [2]:
# the 'max_page_count' can be used to limit this (and any other get_workspace_assets() call)
# to retrieve the first 500 assets, set max_page_size=100 and max_page_count=5
# to retrieve the first 175, set max_page_size=25 and max_page_count=7
all_asset_query = 'state !empty'
my_easm_workspace.get_workspace_assets(query_filter=all_asset_query, asset_list_name='all_assets', max_page_size=100, max_page_count=5)

#### Exploring some of the details available in the returned assets using the custom and default named `AssetLists`

assets are accessible within the `Workspace.AssetList.assets` list

if the asset_list_name argument is passed in `get_workspace_assets()`, then you will use that name

if the asset_list_name argument is not passed in `get_workspace_assets()`, such as the wildcard="true" query above, then you will use the default of 'assetList'

In [8]:
# access asset attributes via asset.<attribute_name>
print ('assetList has this many assets:', len(my_easm_workspace.assetList.assets))
for asset in my_easm_workspace.assetList.assets:
    print("\tasset name:", asset.name)
    print("\tasset wildcard:", asset.wildcard)
    print("\tasset firstSeen:", asset.firstSeen)
    print("\tasset lastSeen:", asset.lastSeen)
    break

In [9]:
# some attributes are common and present in every asset types
print('all_domains has this many assets:', len(my_easm_workspace.all_domains.assets))
for asset in my_easm_workspace.all_domains.assets:
    print("\tasset id:", asset.id)
    print("\tasset uuid:", asset.uuid)
    print("\tasset nameServers:", asset.nameServers)
    break

In [10]:
# some attributes are only present in certain asset types
# if you attempt to access them when they are not present, python will throw an AttributeError
# hasattr() is one way to check whether an asset has any particular attribute before attempting to access or print it
print('all_web_components has this many assets:', len(my_easm_workspace.all_web_components.assets))
for asset in my_easm_workspace.all_web_components.assets:
    if hasattr(asset, 'attributes'):
        print("\tasset attributes:", asset.attributes)
    if hasattr(asset, 'cnames'):
        print("\tasset cnames:", asset.cnames)
    if hasattr(asset, 'headers'):
        print("\tasset headers:", asset.headers)
    if hasattr(asset, 'webComponents'):
        print("\tasset webComponents:", asset.webComponents)
    break

In [13]:
# individual assets can be accessed by their location (index value) within the list
# each asset has .to_dict() and .pretty() methods to format/print the entire object
print('all_assets has this many assets:', len(my_easm_workspace.all_assets.assets))
list_index = 0
for index,asset in enumerate(my_easm_workspace.all_assets.assets):
    if asset.kind == 'sslCert':
        list_index = index
        break
print(f"\tthis is an sslCert asset at index position {list_index}\n",'\t',my_easm_workspace.all_assets.assets[list_index].to_dict())

we also have a method to get a single workspace asset: `get_workspace_asset_by_id()`

this requires that you already know some details about the asset you want to retrieve

there are three different ways to pull a single asset:

      asset.id

      asset.uuid

      base64-encoded asset.id

In [14]:
import base64
asset = my_easm_workspace.all_domains.assets[0]

asset_id = asset.id
asset_uuid = asset.uuid
b64_asset_id = base64.b64encode(asset.id.encode()).decode()

print('the asset.id value looks like:', asset_id)
print('the asset.uuid value looks like:', asset_uuid)
print('the base64-encoded asset.id value looks like:', b64_asset_id)

#retrieve that asset by its asset.id
my_easm_workspace.get_workspace_asset_by_id(asset_id=asset_id)

#retrieve that asset by its asset.uuid
my_easm_workspace.get_workspace_asset_by_id(asset_id=asset_uuid)

#retrieve that asset by its base64-encoded asset.id
my_easm_workspace.get_workspace_asset_by_id(asset_id=b64_asset_id)

we just retrieved the exact same asset three different ways

that asset's details can be examined using the same value we used to retrieve it

In [14]:
# the getattr() function can be used to retrieve object attributes when the attribute name would break the normal "object_name.attribute_name" method
# e.g.: all of these will throw exceptions: 'my_easm_workspace.domain$$example.com', 'my_easm_workspace.01234567-89ab-cdef-0123-456789abcdef', ''my_easm_workspace.ZXhhbXBsZS5jb20=
# using the asset.id
print('retrieve the asset details using the asset.id:', asset_id)
print(getattr(my_easm_workspace, asset_id).pretty())

In [16]:
# using the asset.uuid
print('retrieve the asset details using the asset.uuid:', asset_uuid)
print(getattr(my_easm_workspace, asset_uuid).pretty())

In [17]:
# using the base64-encoded asset.id
print('retrieve the asset details using the base64-encoded asset.id:', b64_asset_id)
print(getattr(my_easm_workspace, b64_asset_id).pretty())

#### Diving deeper into some interesting asset datasets

the tlsVersions available in ipAddress assets are not accessible via the UI, so we can query and extract those details through the API

In [45]:
for asset in my_easm_workspace.all_assets.assets:
    if hasattr(asset, 'sslServerConfig'):
        print('asset name:', asset.name, '\nTLS version and Cipher Suite', [(','.join(sslconf['tlsVersions']), ','.join(sslconf['cipherSuites'])) for sslconf in asset.sslServerConfig if sslconf['cipherSuites'] and sslconf['tlsVersions']], '\n')

webCompponents are queryable and accessible through the UI but if you dont know the exact value to search for, the API is a good option to identify available/queryable values

In [41]:
all_web_components_dict = {}
for asset in my_easm_workspace.all_web_components.assets:
    if hasattr(asset, 'webComponents'):
        for wc in asset.webComponents:
            try:
                all_web_components_dict[(wc.get('name'), wc.get('version'))] += 1
            except KeyError:
                all_web_components_dict[(wc.get('name'), wc.get('version'))] = 1
for key,val in all_web_components_dict.items():
    print(f"Web Component Name and Version: {key}", f"  Count: {val}")

we can easily find any simple string in all URLs within the workspace

try the below example using 'api' instead of 'rss'

In [5]:
for asset in my_easm_workspace.all_assets.assets:
    if hasattr(asset, 'url') and 'rss' in asset.url:
        print(asset.url)

#### Creating and querying facet filters

~~we can turn the above examples into facet filters and then query/search them for workspace details~~

~~create a facet filter for all webComponents in an AssetList~~

~~this places the webComponents['name'] and webComponents['version'] values into the webComponents_filter~~

~~the 'attribute_value' value will always be placed into key[0] and any additional key-word arguments passed to the function (such as version='version') will be placed into subsequent key[N] positions (key[1], key[2], key[3], etc)~~

mdeasm.py now by default automatically create facet filters for every asset detail retreived through the get_workspace_assets() and get_workspace_asset_by_id() functions. (this can be disabled with `auto_create_facet_filters=False`)

facet filters can be accessed directly through the `<mdeasm.Workspaces object>.filters.<filter_name>` attribute; e.g.:

> my_easm_workspace.filters.cveId

> my_easm_workspace.filters.subjectCommonNames

> my_easm_workspace.filters.headers

as such, the `create_facet_filter()` example below is unnecessary, but is kept in the notebook here as an example of the function

In [6]:
my_easm_workspace.create_facet_filter(asset_list='all_web_components', attribute_name='webComponents')

In [7]:
# a search against the webComponents can will perform a case-insensitive (default) search against every item in the filter
# we can perform this search across all facet filters by omitting the 'facet_filter' argument, or any individual filter by specifying it with 'facet_filter='
print('facet filter search for "azure" in the "webComponents" filter, using "contains" and sort "descending"')
my_easm_workspace.query_facet_filter(search='azure', facet_filter='webComponents', search_type='contains', sort_order='descending')

In [8]:
#another example using some different search and sort options
print('facet filter simple search for "ROUND", using "starts" and sort "ascending"')
my_easm_workspace.query_facet_filter(search='azure', facet_filter='webComponents', search_type='starts', sort_order='ascending')

In [15]:
# the webComponents_filter is simply a dict attribute of the Workspace()
# so if you want to perform for advanced/customized facet filter queries, you can access the facet filter directly and leverage all dict method
print('interact with the facet filter dictionary attribute directly and leverage any dict method to iterate and/or search')
for key,val in my_easm_workspace.filters.webComponents.items():
    print(key)
    print(val)
    break

**create a facet filter for all TLS versions in an AssetList**

this places the `sslServerConfig['tlsVersions']` and `sslServerConfig['cipherSuites']` values into the **sslServerConfig_filter**

note that this is the opposite order as the example above

this will allow us to use simple_search on the tlsVersion

also note that we are using a different key-word argument than 'version' above

In [10]:
my_easm_workspace.create_facet_filter(asset_list='all_assets', attribute_name='sslServerConfig')

In [13]:
# simple search for 1.1
print('facet filter search for "1.1" in "sslServerConfig", using "contains" and sort "descending"')
my_easm_workspace.query_facet_filter(search='1.1', facet_filter='sslServerConfig')

same as stated above, this is not necessary if we are auto-creating facet filters, but still keeping this here as an example

In [14]:
my_easm_workspace.create_facet_filter(asset_list='all_assets', attribute_name='subjectAlternativeNames')

the `query_facet_filter()` search leverages regex, so most character classes and token will work here

In [16]:
print('facet filter search for "*." retrieves wildcard SANs')
my_easm_workspace.query_facet_filter(search='\*\.', facet_filter='subjectAlternativeNames', search_type='starts')

In [17]:
print('\nfacet filter search for "[a-zA-Z0-9\.]*"')
my_easm_workspace.query_facet_filter(search='[a-zA-Z0-9\.]*', facet_filter='subjectAlternativeNames')

`Hands-on Excercises?`

*Create a new easm workspace and run a discovery using a pre-built template*

In [None]:
my_easm_workspace.create_workspace(workspace_name='', resource_group_name='', location='')

my_easm_workspace.get_discovery_templates(org_name='')

my_easm_workspace.get_discovery_template_by_id(template_id='')

In [None]:
my_easm_workspace.create_discovery_group(disco_template='')

*Query a workspace for all `Approved Domains`*

In [None]:
my_easm_workspace.get_workspace_assets()

*Create a facet filter for `Name Servers` and run a facet filter query*

In [None]:
my_easm_workspace.create_facet_filter()

In [None]:
my_easm_workspace.query_facet_filter()