This repository has been archived by the owner on Nov 16, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 514
/
Episode 2 - Joins.txt
325 lines (272 loc) · 10.9 KB
/
Episode 2 - Joins.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
print Series = 'Tracking the Adversary with MTP Advanced Hunting', EpisodeNumber = 2, Topic = 'Joins', Presenter = 'Michael Melone, Tali Ash', Company = 'Microsoft'
// Language Reference: https://docs.microsoft.com/en-us/azure/kusto/query/
// Advanced Hunting Reference: https://docs.microsoft.com/en-us/microsoft-365/security/mtp/advanced-hunting-schema-tables?view=o365-worldwide
// ---------------
// Joins
// - Links two datasets together based on a common key
// - Can heavily impact performance depending on how datasets are joined
// - If datasets being joined are too large you may get an error
// ---------------
// The Join Statement
// In the below example, we will find users in the Finance department and determine where they have logged on.
// We'll accomplish this using the IdentityInfo table (user information) and the IdentityLogonEvents
// table.
IdentityLogonEvents
| take 100
// IdentityLogonEvents
// - Authentications performed against an on-prem DC or to Microsoft online services.
// - Contains success \ fail information, logon type, application, identity information, and client information
IdentityInfo
| where Department == 'Finance'
| join IdentityLogonEvents on AccountObjectId
// Note that we now have duplicate columns.
// the duplicates have a '1' at the end of the column name to
// avoid errors.
// This example uses two datasets, identified as "left" and "right"
// based on their location relative to the join statement.
// Left table:
IdentityInfo
| where Department == 'Finance'
// Right table:
IdentityLogonEvents
| take 100
// As long as the join column names match this should
// work nicely. If the column names do not match, we may
// need to specify which columns to join...
// We accomplish this by using $left. and $right.
IdentityInfo
| where Department == 'Finance'
| project-rename objid = AccountObjectId
| join IdentityLogonEvents on $left.objid == $right.AccountObjectId
// --------------------------------------------------------
// JOIN TYPES
// Now comes the fun part - understanding the default Kusto join.
let LeftTable = datatable (key:int, value:string)
[
0, "Hello",
0, "Hola",
1, "Salut",
1, "Ciao",
2, "Hallo"
];
let RightTable = datatable (key:int, value:string)
[
0, "World",
0, "Mundo",
1, "Monde",
1, "Mondo",
2, "Welt"
];
LeftTable
| join RightTable on key
// As you can see we are missing data. The default Kusto join
// deduplicates the left table based on the join column before
// joining the datasets together. Because of this, we lose
// "Hola" and "Ciao".
// This is important since it can directly result in missed
// detections! If you want to join data together using the
// standard inner join (the default in SQL) you need to specify
// kind = inner!
// The default join can be handy from a performance perspective. For
// example, let's say we wanted to produce a list of users who logged
// on to Windows 10 devices. The DeviceInfo table has duplicates (one
// row for each checkin), but we don't need them represented.
DeviceInfo
| where OSPlatform == 'Windows10'
| join DeviceLogonEvents on DeviceId
| distinct DeviceId, DeviceName, AccountDomain, AccountName, AccountSid
// Specifying kind=inner enables us to return all rows from both tables
let LeftTable = datatable (key:int, value:string)
[
0, "Hello",
0, "Hola",
1, "Salut",
1, "Ciao",
2, "Hallo"
];
let RightTable = datatable (key:int, value:string)
[
0, "World",
0, "Mundo",
1, "Monde",
1, "Mondo",
2, "Welt"
];
LeftTable
| join kind=inner RightTable on key
// This comes in handy when you want to see every network communication within 5 minutes
// of an alert event on the device
AlertEvidence
| where isnotempty(DeviceId)
| project-rename AlertTimestamp = Timestamp
| join kind=inner DeviceNetworkEvents on DeviceId
| where Timestamp between (datetime_add('minute', -5, AlertTimestamp) .. datetime_add('minute', 5, AlertTimestamp))
// Other types of joins
// - left outer: all rows from the left table regardless if they match on the right
// - right outer: all rows from the right table regardless if they match on the left
let LeftTable = datatable (key:int, value:string)
[
0, "Foo",
1, "Bar",
2, "Baz",
3, "Qux",
4, "Quux"
];
let RightTable = datatable (key:int, value:string)
[
0, "Wibble",
1, "Wobble",
2, "Wubble",
];
LeftTable
| join kind=leftouter RightTable on key
// For example, let’s say we wanted a list of all emails that the malware
// filter detected as phishing paired with details about their attachments.
// EmailEvents
// ref: https://docs.microsoft.com/en-us/microsoft-365/security/mtp/advanced-hunting-emailevents-table?view=o365-worldwide
// Contains information about e-mails processed through Office ATP, including
// - Standard email metadata
// - Whether phish or malware detection identified the e-mail as malicious upon receipt
// - Actions taken by Office ATP on the e-mail upon receipt
// EmailAttachmentInfo
// ref: https://docs.microsoft.com/en-us/microsoft-365/security/mtp/advanced-hunting-emailattachmentinfo-table?view=o365-worldwide
// Contains information about e-mail attachments
EmailEvents
| where ThreatTypes == "Phish"
| join kind=leftouter EmailAttachmentInfo on NetworkMessageId, RecipientObjectId
| take 100
// EmailEvents can tell us what e-mails were picked up as phishing, but we won’t
// have an entry in EmailAttachmentInfo for each since many are unlikely to have
// an attachment. To accomplish this we used left outer join.
// ------------------------------------------
// - full outer: all rows of both tables despite whether or not they match each other
let LeftTable = datatable (key:int, value:string)
[
0, "Foo",
1, "Bar",
2, "Baz",
3, "Qux",
4, "Quux"
];
let RightTable = datatable (key:int, value:string)
[
2, "Wibble",
3, "Wobble",
16, "Wubble",
];
LeftTable
| join kind=fullouter RightTable on key
// I use this in a query I use reporting on antimalware signature, engine, and platform versions.
let StartDate = ago(30d);
DeviceFileEvents
| where Timestamp > StartDate
// Find signature \ engine update activity
| where InitiatingProcessFileName =~ 'MpSigStub.exe' and InitiatingProcessCommandLine contains '/stub' and InitiatingProcessCommandLine contains '/payload'
| summarize Timestamp = arg_max(Timestamp, InitiatingProcessCommandLine) by DeviceId, DeviceName
| extend SplitCommand = split(InitiatingProcessCommandLine, ' ')
// Locate stub and payload versions
| extend EngineVersionLocation = array_index_of(SplitCommand, "/stub") + 1, DefinitionVersionLocation = array_index_of(SplitCommand, "/payload") + 1
| project Timestamp, DeviceName, DeviceId, AMEngineVersion = SplitCommand[EngineVersionLocation], AntivirusSignatureVersion = SplitCommand[DefinitionVersionLocation]
| join kind=fullouter (
DeviceProcessEvents
| where Timestamp > StartDate
// Find process creations for MsMpEng from the platform folder
| where FileName =~ 'MsMpEng.exe' and FolderPath contains @"\Microsoft\Windows Defender\Platform\"
| summarize arg_max(Timestamp, FolderPath) by DeviceId, DeviceName
// Go up two levels
| project DeviceId, DeviceName, AMServiceVersion = split(FolderPath, '\\')[-2]
) on DeviceId
// Re-projecting to make the UI happy
| project DeviceId, DeviceName, AMEngineVersion, AntivirusSignatureVersion, AMServiceVersion
// There are also anti joins and semi joins which are designed to quickly reduce datasets
// anti joins will remove any matching rows and return only the left or right table
// - leftanti: removes any rows that match between the two tables, only returns the left table
let LeftTable = datatable (key:int, value:string)
[
0, "Foo",
1, "Bar",
2, "Baz",
3, "Qux",
4, "Quux"
];
let RightTable = datatable (key:int, value:string)
[
2, "Wibble",
3, "Wobble",
16, "Wubble",
];
LeftTable
| join kind=leftanti RightTable on key
// rightanti - you guessed it. It removes matches and returns values from the right table
let LeftTable = datatable (key:int, value:string)
[
0, "Foo",
1, "Bar",
2, "Baz",
3, "Qux",
4, "Quux"
];
let RightTable = datatable (key:int, value:string)
[
2, "Wibble",
3, "Wobble",
16, "Wubble",
];
LeftTable
| join kind=rightanti RightTable on key
// Let’s say you wanted to see e-mails which were identified as either phishing
// or malware which were likely still in user’s mailboxes. To achieve this, we
// will use EmailEvents to identify the suspicious e-mails and filter the results
// using the EmailPostDeliveryEvents table.
// EmailPostDeliveryEvents
// ref: https://docs.microsoft.com/en-us/microsoft-365/security/mtp/advanced-hunting-emailpostdeliveryevents-table?view=o365-worldwide
// contains information about post-delivery remediation actions such as manual administrator
// remediation, phish zap, or malware zap
EmailEvents
| where ThreatTypes in ('Phish', 'Malware') and EmailAction !in ('Replace attachment', 'Send to quarantine')
| join kind=leftanti EmailPostDeliveryEvents on NetworkMessageId , RecipientEmailAddress
// For all of the joins, check out: https://docs.microsoft.com/en-us/azure/kusto/query/joinoperator
// ---------------------------
// union
// Sometimes you want to "link" two queries together into one result instead of joining them based on a key.
// To accomplish this you would use the union operator. A union merges all rows from each query where the column
// name and data type match.
let LeftTable = datatable (key:int, value:string)
[
0, "Foo",
1, "Bar",
2, "Baz",
3, "Qux",
4, "Quux"
];
let RightTable = datatable (key:int, value:string)
[
2, "Wibble",
3, "Wobble",
16, "Wubble",
];
LeftTable
| union RightTable
// Notice we no longer have the extra columns from a join. This might be useful if you want to track
// logon activity with devices (the DeviceLogonEvents table) and Active Directory \ Azure Active Directory
// (the IdentityLogonEvents table) in one query.
DeviceLogonEvents
| extend Table = 'DeviceLogonEvents'
| take 100
| union (
IdentityLogonEvents
| extend Table = 'IdentityLogonEvents'
| take 100
)
| project-reorder Timestamp, Table, AccountDomain, AccountName, AccountUpn, AccountSid
| order by Timestamp asc
// --------------------------------------
// Functions are a special sort of join which let you pull more static data about a file (more are
// planned in the future, stay tuned!). This is really helpful when you want to get information about
// file prevalence or antimalware detections.
// Let's say we wanted information about rare files involved in a process creation event
DeviceProcessEvents
| invoke FileProfile() // Call the FileProfile function
| where isnotempty(GlobalPrevalence) and GlobalPrevalence < 1000 // Note that in the real world you might want to include empty GlobalPrevalence
| project-reorder DeviceName, FileName, ProcessCommandLine, FileSize, GlobalPrevalence, GlobalFirstSeen, GlobalLastSeen, ThreatName, Publisher, SoftwareName
| top 100 by GlobalPrevalence asc