# Example Signature Notebook

### MSTIC Config

In [1]:
%%writefile msticpyconfig.yaml
AzureSentinel:
  Workspaces:
    ASIHuntOMSWorkspaceV4:
      TenantId: 72f988bf-86f1-41af-91ab-2d7cd011db47
      WorkspaceId: 52b1ab41-869e-4138-9e40-2a4457f09bf0
    CyberSecuritySoc:
      TenantId: 72f988bf-86f1-41af-91ab-2d7cd011db47
      WorkspaceId: 8ecf8077-cf51-4820-aadd-14040956f35d
    Default:
      TenantId: 72f988bf-86f1-41af-91ab-2d7cd011db47
      WorkspaceId: 8ecf8077-cf51-4820-aadd-14040956f35d

Overwriting msticpyconfig.yaml


### Initialize MSTICPy

In [2]:
# Core MSTICPy initialization for Notebooks
from msticpy.nbtools import nbinit
nbinit.init_notebook(namespace=globals());

# Load query providers (typically you'll be using just one)
qry_prov = QueryProvider("AzureSentinel")

Please wait. Loading Kqlmagic extension...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Connect to Workspace

In [3]:
qry_prov.connect(WorkspaceConfig())

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [4]:
import ipywidgets as widgets

tables = qry_prov.schema_tables
dropdown = widgets.Dropdown(
    options=tables,
    description='Table:',
    value='OfficeActivity'
)

display(dropdown)

Dropdown(description='Table:', index=230, options=('AACAudit', 'AACHttpRequest', 'AADDomainServicesAccountLogo…

### Get the N most recent Rows Store in dataframe

In [5]:
# Get the most recent n rows

nRows = 10000
table = dropdown.value
queryString = f"{table} | take {nRows}"
df = qry_prov.exec_query(queryString)

<IPython.core.display.Javascript object>

### Generate Data Signatures

In [6]:
from signature import DataSignature

data = DataSignature(df)
# Regular expresion to remove is time
data.generateSignatures(regexes=[r'^.*[Tt][Ii][Mm][Ee].*$'])
data.findUniques()

### Visualize Signatures

In [7]:
signatures = sorted(list(data.featureMap.keys()))
print("Please select a signature of present features to analyze")
signatureDropdown = widgets.Dropdown(
    options=signatures,
    description='Present Features:',
    wrap='flex',
)

display(signatureDropdown)

Please select a signature of present features to analyze


Dropdown(description='Present Features:', options=(('AADGroupId', 'AddOnGuid', 'AddOnType', 'ChannelGuid', 'Ch…

In [8]:
if signatureDropdown.value not in data.featureMap:
    print('You selected an invalid combination of present features. Please try again.')
else:
    binarySignature = data.featureMap[signatureDropdown.value]
    count = data.signatureDict[binarySignature]['count']
    presentFeatures = data.signatureDict[binarySignature]['presentFeatures']
    missingFeatures = data.signatureDict[binarySignature]['missingFeatures']
    print(f'Summary:\n\nNumber of samples that have this signature: {count}\n')
    print(f'Present Features: {presentFeatures}\n')
    print(f'Missing Features: {missingFeatures}\n\n')
    print('Please select a feature to visualize.')
    featureDropdown = widgets.Dropdown(
        options=sorted(list(data.signatureDict[binarySignature]['featureDict'].keys())),
        description='Present Features:',
    )
    display(featureDropdown)

Summary:

Number of samples that have this signature: 1

Present Features: ['RecordType', 'Operation', 'UserType', 'UserKey', 'OfficeWorkload', 'UserId', 'ClientIP', 'OfficeId', 'TeamName', 'TeamGuid', 'ChannelName', 'ChannelGuid', 'AddOnType', 'TabType', 'AADGroupId', 'AddOnGuid']

Missing Features: ['UserAgent', 'ResultStatus', 'OfficeObjectId', 'Site_', 'ItemType', 'EventSource', 'MachineId', 'Site_Url', 'SourceRelativeUrl', 'SourceFileName', 'SourceFileExtension', 'DestinationRelativeUrl', 'DestinationFileName', 'DestinationFileExtension', 'Event_Data', 'Parameters', 'ExternalAccess', 'OriginatingServer', 'OrganizationName', 'Logon_Type', 'InternalLogonType', 'MailboxGuid', 'MailboxOwnerUPN', 'MailboxOwnerSid', 'MailboxOwnerMasterAccountSid', 'LogonUserSid', 'ClientInfoString', 'Client_IPAddress', 'ClientProcessName', 'ClientVersion', 'Folder', 'DestFolder', 'Folders', 'AffectedItems', 'Item', 'ModifiedProperties', 'TargetUserOrGroupName', 'TargetUserOrGroupType', 'MessageId', 'Mem

Dropdown(description='Present Features:', options=('AADGroupId', 'AddOnGuid', 'AddOnType', 'ChannelGuid', 'Cha…

In [9]:
from bokeh.plotting import figure, output_notebook, show

output_notebook()

if featureDropdown.value not in list(data.signatureDict[binarySignature]['featureDict'].keys()):
    print('You selected an invalid feature to analyze. Please try again.')
else:
    featureDict = data.signatureDict[binarySignature]['featureDict'][featureDropdown.value]
    total = sum(list(featureDict.values()))
    probabilities = [ i/total for i in list(featureDict.values())]
    graph = figure(x_range=(list(featureDict.keys())), plot_height=250, title=f"{featureDropdown.value} Distribution", toolbar_location='below', tools="pan,wheel_zoom,box_zoom,reset")
    graph.vbar(x=(list(featureDict.keys())), top=probabilities, width=0.9)
    graph.xgrid.grid_line_color = None
    graph.y_range.start = 0
    graph.title.align = 'center'
    show(graph)
    print(f'Distribution Table for {featureDropdown.value}\n')
    dataTable = pd.DataFrame(list(zip(probabilities, list(featureDict.values()))), columns=['Probabilities','Count'])
    dataTable.index = list(featureDict.keys())
    display(dataTable)

Distribution Table for AADGroupId



Unnamed: 0,Probabilities,Count
c32f6e3f-9481-4884-a06c-81cc27c16120,1.0,1


In [10]:
uniqueToTable = []
uniqueToSignature = []
for uniqueFeature in data.uniqueFeatures[binarySignature]:
    if data.featureValueCounts[tuple([tuple(uniqueFeature)[0], uniqueFeature[tuple(uniqueFeature)[0]]])] == 1:
        uniqueToTable.append(tuple([tuple(uniqueFeature)[0], uniqueFeature[tuple(uniqueFeature)[0]]]))
    else:
        uniqueToSignature.append(tuple([tuple(uniqueFeature)[0], uniqueFeature[tuple(uniqueFeature)[0]]]))

In [11]:
dfTableUnique = pd.DataFrame(np.array(uniqueToTable), columns = ['Feature', 'Value'])
dfSignatureUnique = pd.DataFrame(np.array(uniqueToSignature), columns = ['Feature', 'Value'])
print('Signature Identifiers (values that this signature always contains)\n')
display(dfSignatureUnique)
print('\nUnique Signature Identifiers (values that are unique to this signature throughout the entire table)')
display(dfTableUnique)

Signature Identifiers (values that this signature always contains)



Unnamed: 0,Feature,Value
0,RecordType,MicrosoftTeams
1,Operation,TabAdded
2,UserType,Application
3,UserKey,98785600-1bb7-4fb9-b9fa-19afe2c8a360
4,OfficeWorkload,MicrosoftTeams
5,UserId,Azure Security Insights
6,TeamName,Incident 19754: Malicious credential theft tool execution detected
7,TeamGuid,19:iJ9_L3mfIstCYzHHDEqiLrzE9y07ux6K1_2qNQtgc0M1@thread.tacv2
8,ChannelName,General
9,ChannelGuid,19:iJ9_L3mfIstCYzHHDEqiLrzE9y07ux6K1_2qNQtgc0M1@thread.tacv2



Unique Signature Identifiers (values that are unique to this signature throughout the entire table)


Unnamed: 0,Feature,Value
0,ClientIP,52.114.144.65
1,OfficeId,a0812ab8-d67d-50f8-a3f6-94271468284a
2,TabType,webpage
3,AddOnGuid,tab::0df216de-2920-4dd8-9652-1977ec3e98cd
