# Example Data Signature Notebook

### Initialize MSTICPy

In [1]:
# Core MSTICPy initialization for Notebooks
from msticpy.nbtools import nbinit
nbinit.init_notebook(namespace=globals());

# Load query providers (typically you'll be using just one)
qry_prov = QueryProvider("AzureSentinel")

Please wait. Loading Kqlmagic extension...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Connect to Workspace

In [2]:
qry_prov.connect(WorkspaceConfig())

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Select a table to analyze

In [78]:
import ipywidgets as widgets

tables = qry_prov.schema_tables
dropdown = widgets.Dropdown(
    options=tables,
    description='Table:',
    value='OfficeActivity'
)

display(dropdown)

Dropdown(description='Table:', index=234, options=('AACAudit', 'AACHttpRequest', 'AADDomainServicesAccountLogo…

### Take the N Most Recent Samples

In [51]:
# Get the most recent n rows

nRows = 10000
table = dropdown.value
queryString = f"{table} | take {nRows}"
df = qry_prov.exec_query(queryString)

<IPython.core.display.Javascript object>

### Generate Data Signatures

In [87]:
from signature import DataSignature

data = DataSignature(df)
# Regular expresion to remove is time
data.generateSignatures(regexes=[r'^.*[Tt][Ii][Mm][Ee].*$'])
data.findUniques()

### Visualize Signatures

In [88]:
signatures = sorted(list(data.featureMap.keys()))
print("\nPlease select a signature of present features to analyze\n")
signatureDropdown = widgets.Dropdown(
    options=signatures,
    description='Present Features:',
    wrap='flex',
)

display(signatureDropdown)


Please select a signature of present features to analyze



Dropdown(description='Present Features:', options=(('AADGroupId', 'CommunicationType', 'ItemName', 'Members', …

In [89]:
if signatureDropdown.value not in data.featureMap:
    print('You selected an invalid combination of present features. Please try again.')
else:
    binarySignature = data.featureMap[signatureDropdown.value]
    count = data.signatureDict[binarySignature]['count']
    presentFeatures = data.signatureDict[binarySignature]['presentFeatures']
    missingFeatures = data.signatureDict[binarySignature]['missingFeatures']
    print(f'Summary:\n\nNumber of samples that have this signature: {count}\n')
    print(f'Present Features: {presentFeatures}\n')
    print(f'Missing Features: {missingFeatures}\n\n')
    print('Please select a feature to visualize.')
    featureDropdown = widgets.Dropdown(
        options=sorted(list(data.signatureDict[binarySignature]['featureDict'].keys())),
        description='Present Features:',
    )
    display(featureDropdown)

Summary:

Number of samples that have this signature: 111

Present Features: ['RecordType', 'Operation', 'UserType', 'UserKey', 'OfficeWorkload', 'ResultStatus', 'UserId', 'ClientIP', 'ExternalAccess', 'OriginatingServer', 'OrganizationName', 'Logon_Type', 'MailboxGuid', 'MailboxOwnerUPN', 'MailboxOwnerSid', 'ClientInfoString', 'Client_IPAddress', 'Item', 'OfficeId', 'AppId', 'ClientAppId']

Missing Features: ['UserAgent', 'OfficeObjectId', 'Site_', 'ItemType', 'EventSource', 'Site_Url', 'SourceRelativeUrl', 'SourceFileName', 'SourceFileExtension', 'DestinationRelativeUrl', 'DestinationFileName', 'DestinationFileExtension', 'Event_Data', 'Parameters', 'ClientProcessName', 'ClientVersion', 'Folder', 'DestFolder', 'Folders', 'AffectedItems', 'ModifiedProperties', 'TargetUserOrGroupName', 'TargetUserOrGroupType', 'Members', 'TeamName', 'TeamGuid', 'ItemName', 'CommunicationType', 'AADGroupId', 'OperationProperties']


Please select a feature to visualize.


Dropdown(description='Present Features:', options=('AppId', 'ClientAppId', 'ClientIP', 'ClientInfoString', 'Cl…

In [90]:
from bokeh.plotting import figure, output_notebook, show

output_notebook()

if featureDropdown.value not in list(data.signatureDict[binarySignature]['featureDict'].keys()):
    print('You selected an invalid feature to analyze. Please try again.')
else:
    featureDict = data.signatureDict[binarySignature]['featureDict'][featureDropdown.value]
    total = sum(list(featureDict.values()))
    probabilities = [ i/total for i in list(featureDict.values())]
    graph = figure(x_range=(list(featureDict.keys())), plot_height=250, plot_width=800, title=f"{featureDropdown.value} Distribution", toolbar_location='below', tools="pan,wheel_zoom,box_zoom,reset")
    graph.vbar(x=(list(featureDict.keys())), top=probabilities, width=0.9)
    graph.xgrid.grid_line_color = None
    graph.y_range.start = 0
    graph.title.align = 'center'
    show(graph)
    print(f'Distribution Table for {featureDropdown.value}\n')
    dataTable = pd.DataFrame(list(zip(probabilities, list(featureDict.values()))), columns=['Probabilities','Count'])
    dataTable.index = list(featureDict.keys())
    display(dataTable)

Distribution Table for AppId



Unnamed: 0,Probabilities,Count
00000003-0000-0000-c000-000000000000,0.675676,75
3c896ded-22c5-450f-91f6-3d1ef0848f6e,0.306306,34
30b65a76-e816-400d-95b0-cd21a53059e7,0.018018,2


### Generate Unique Identifiers

In [91]:
uniqueToTable = []
uniqueToSignature = []
for uniqueFeature in data.uniqueFeatures[binarySignature]:
    if data.featureValueCounts[tuple([tuple(uniqueFeature)[0], uniqueFeature[tuple(uniqueFeature)[0]]])] == 1:
        uniqueToTable.append(tuple([tuple(uniqueFeature)[0], uniqueFeature[tuple(uniqueFeature)[0]]]))
    else:
        uniqueToSignature.append(tuple([tuple(uniqueFeature)[0], uniqueFeature[tuple(uniqueFeature)[0]]]))

### Display Unique Identifiers

In [92]:
if not uniqueToTable:
    print("No unique to table identifiers\n")
else:
    dfTableUnique = pd.DataFrame(np.array(uniqueToTable), columns = ['Feature', 'Value'])
    print('\nUnique Signature Identifiers (values that are unique to this signature throughout the entire table)')
    display(dfTableUnique)
if not uniqueToSignature:
    print("No potential signature identifiers\n")
else:
    dfSignatureUnique = pd.DataFrame(np.array(uniqueToSignature), columns = ['Feature', 'Value'])
    print('Signature Identifiers (values that this signature always contains)\n')
    display(dfSignatureUnique)

No unique to table identifiers

Signature Identifiers (values that this signature always contains)



Unnamed: 0,Feature,Value
0,RecordType,ExchangeItem
1,UserType,Regular
2,OfficeWorkload,Exchange
3,ResultStatus,Succeeded
4,ExternalAccess,False
5,OrganizationName,seccxpninja.onmicrosoft.com
6,Logon_Type,Owner
