## Generate MSTICpy Config

In [1]:
# Only generate config if Teams
# %%writefile msticpyconfig.yaml
# AzureSentinel:
#   Workspaces:
#     ASIHuntOMSWorkspaceV4:
#       TenantId: 72f988bf-86f1-41af-91ab-2d7cd011db47
#       WorkspaceId: 52b1ab41-869e-4138-9e40-2a4457f09bf0
#     CyberSecuritySoc:
#       TenantId: 72f988bf-86f1-41af-91ab-2d7cd011db47
#       WorkspaceId: 8ecf8077-cf51-4820-aadd-14040956f35d
#     Default:
#       TenantId: 72f988bf-86f1-41af-91ab-2d7cd011db47
#       WorkspaceId: 8ecf8077-cf51-4820-aadd-14040956f35d

## Core MSTICPy initialization for Notebooks

In [2]:
from msticpy.nbtools import nbinit
nbinit.init_notebook(namespace=globals())
qry_prov = QueryProvider("AzureSentinel")

Please wait. Loading Kqlmagic extension...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [4]:
qry_prov.connect(WorkspaceConfig())
tables = qry_prov.schema_tables

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Choose A Table To Analzye

In [5]:
import ipywidgets as widgets
from IPython.display import display

print('\nPlease select a table to analyze.\n')
tableDropdown = widgets.Dropdown(
    options=sorted(tables),
    value=sorted(tables)[0],
    description='Table:',
)
display(tableDropdown)


Please select a table to analyze.



Dropdown(description='Table:', index=231, options=('AACAudit', 'AACHttpRequest', 'AADDomainServicesAccountLogo…

## Choose a Feature(s) and Timeframe to Analyze

In [6]:
df = qry_prov.exec_query(f'{tableDropdown.value} | take 1')
timestamp_col = 'TimeGenerated'

if df.empty:
    print("Table is empty, please select another table.")
else:
    print ("\nWhat kind of features/columns would you like to see be analyzed?\n")
    options = sorted(list(df))
    options.remove(timestamp_col)
    try:
        options.remove('Type')
    except TypeError:
        print('Type does not exist')
    selected_features = nbwidgets.SelectSubset(source_items=options)
    print('\n\nWhat time frame do you want to analyze?\n')
    timeFrame = nbwidgets.QueryTime(units='day', max_before=20, before=5, max_after=1)
    timeFrame.display()

<IPython.core.display.Javascript object>


What kind of features/columns would you like to see be analyzed?



VBox(children=(Text(value='', description='Filter:', style=DescriptionStyle(description_width='initial')), HBo…



What time frame do you want to analyze?



VBox(children=(HTML(value='<h4>Set query time boundaries</h4>'), HBox(children=(DatePicker(value=datetime.date…

## Generate Timeseries Anomalies

In [40]:
def mapAnomalousColors(cells):
    if cells.name in anomalousFeatures:
        return ['background-color: #ffcccb' for i in range(len(cells))]
    return ['background-color: #90EE90' for i in range(len(cells))]

In [41]:
from time_series_utils import check_kwargs, ts_anomalies_stl
from datetime import timedelta

start = (timeFrame.start)
end = (timeFrame.end)
features = selected_features.selected_values

featureDict = {}
anomalyDict = {}
timeframeDict = {}

if start == end:
    print("\nPlease make sure the start and end date are distinct\n")
elif len(features) == 0:
    print("\nPlease choose at least one feature\n")
else:
    for feature in features:
        raw_times_series_data = qry_prov.MultiDataSource.get_timeseries_data(
            start=start,
            end=end,
            table=f"{tableDropdown.value}",
            timestampcolumn=timestamp_col,
            aggregatecolumn=f"{feature}",
            aggregatefunction=f"dcount(tostring({feature}))",
            add_query_items=f'|mv-expand {timestamp_col} to typeof(datetime), {feature} to typeof(long)',
        )
        df_time_series = raw_times_series_data[[timestamp_col, f'{feature}']]
        df_time_series = df_time_series.set_index(timestamp_col)
        anomalies = ts_anomalies_stl(df_time_series)
        featureDict[f'{feature}'] = anomalies
        anomalous_timestamps = list(anomalies[anomalies['anomalies']==1][timestamp_col])
        for timestamp in anomalous_timestamps:
            if timestamp not in anomalyDict:
                anomalyDict[timestamp] = [f'{feature}']
            else:
                anomalyDict[timestamp].append(f'{feature}')
        print(f'Timeseries for {feature} generated')
    print('\nQuerying Raw Data For ')
    print('\nTimestamp for Anomalous Features\n')
    anomalyDf = pd.DataFrame(anomalyDict.items(), columns=[timestamp_col, 'Anomalous Features'])
    anomalyDf = anomalyDf.sort_values(by=[timestamp_col]).reset_index(drop=True)
    anomalyTimeStamps = list(anomalyDf[timestamp_col])
    for timestamp in anomalyTimeStamps:
        # Timerange at which the raw data is retrieved. Default 1 hour before and 2 hours (including the 1 hour time range)
        startRange = timestamp - timedelta(hours=1)
        endRange = timestamp + timedelta(hours=2)
        anomalousFeatures = list(anomalyDf.loc[anomalyDf[timestamp_col] == timestamp]['Anomalous Features'])[0]
        queryString = f"{tableDropdown.value} | where TimeGenerated between(datetime({starttime})..datetime({endtime}))
        "
        result = qry_prov.exec_query(queryString)
        result = result.style.apply(mapAnomalousColors)
        timeframeDict[timestamp] = result
    display(anomalyDf)

<IPython.core.display.Javascript object>

Timeseries for AADGroupId generated


<IPython.core.display.Javascript object>

Timeseries for AADTarget generated


<IPython.core.display.Javascript object>

Timeseries for Actor generated


<IPython.core.display.Javascript object>

Timeseries for ActorContextId generated


<IPython.core.display.Javascript object>

Timeseries for ActorIpAddress generated


<IPython.core.display.Javascript object>

Timeseries for AddOnGuid generated


<IPython.core.display.Javascript object>

Timeseries for AddOnType generated


<IPython.core.display.Javascript object>

Timeseries for AddonName generated


<IPython.core.display.Javascript object>

Timeseries for AffectedItems generated


<IPython.core.display.Javascript object>

Timeseries for AppDistributionMode generated


<IPython.core.display.Javascript object>

Timeseries for AppId generated


<IPython.core.display.Javascript object>

Timeseries for Application generated


<IPython.core.display.Javascript object>

Timeseries for AzureADAppId generated


<IPython.core.display.Javascript object>

Timeseries for AzureActiveDirectory_EventType generated


<IPython.core.display.Javascript object>

Timeseries for ChannelGuid generated


<IPython.core.display.Javascript object>

Timeseries for ChannelName generated


<IPython.core.display.Javascript object>

Timeseries for ChannelType generated


<IPython.core.display.Javascript object>

Timeseries for ChatName generated


<IPython.core.display.Javascript object>

Timeseries for ChatThreadId generated


<IPython.core.display.Javascript object>

Timeseries for Client generated


<IPython.core.display.Javascript object>

Timeseries for ClientAppId generated


<IPython.core.display.Javascript object>

Timeseries for ClientIP generated


<IPython.core.display.Javascript object>

Timeseries for ClientIP_ generated


<IPython.core.display.Javascript object>

Timeseries for ClientInfoString generated


<IPython.core.display.Javascript object>

Timeseries for ClientMachineName generated


<IPython.core.display.Javascript object>

Timeseries for ClientProcessName generated


<IPython.core.display.Javascript object>

Timeseries for ClientVersion generated


<IPython.core.display.Javascript object>

Timeseries for Client_IPAddress generated


<IPython.core.display.Javascript object>

Timeseries for CommunicationType generated


<IPython.core.display.Javascript object>

Timeseries for CrossMailboxOperations generated


<IPython.core.display.Javascript object>

Timeseries for CustomEvent generated


<IPython.core.display.Javascript object>

Timeseries for DataCenterSecurityEventType generated


<IPython.core.display.Javascript object>

Timeseries for DestFolder generated


<IPython.core.display.Javascript object>

Timeseries for DestMailboxId generated


<IPython.core.display.Javascript object>

Timeseries for DestMailboxOwnerMasterAccountSid generated


<IPython.core.display.Javascript object>

Timeseries for DestMailboxOwnerSid generated


<IPython.core.display.Javascript object>

Timeseries for DestMailboxOwnerUPN generated


<IPython.core.display.Javascript object>

Timeseries for DestinationFileExtension generated


<IPython.core.display.Javascript object>

Timeseries for DestinationFileName generated


<IPython.core.display.Javascript object>

Timeseries for DestinationRelativeUrl generated


<IPython.core.display.Javascript object>

Timeseries for EffectiveOrganization generated


<IPython.core.display.Javascript object>

Timeseries for ElevationApprovedTime generated


<IPython.core.display.Javascript object>

Timeseries for ElevationApprover generated


<IPython.core.display.Javascript object>

Timeseries for ElevationDuration generated


<IPython.core.display.Javascript object>

Timeseries for ElevationRequestId generated


<IPython.core.display.Javascript object>

Timeseries for ElevationRole generated


<IPython.core.display.Javascript object>

Timeseries for ElevationTime generated


<IPython.core.display.Javascript object>

Timeseries for EventSource generated


<IPython.core.display.Javascript object>

Timeseries for Event_Data generated


<IPython.core.display.Javascript object>

Timeseries for ExtendedProperties generated


<IPython.core.display.Javascript object>

Timeseries for ExternalAccess generated


<IPython.core.display.Javascript object>

Timeseries for ExtraProperties generated


<IPython.core.display.Javascript object>

Timeseries for Folder generated


<IPython.core.display.Javascript object>

Timeseries for Folders generated


<IPython.core.display.Javascript object>

Timeseries for GenericInfo generated


<IPython.core.display.Javascript object>

Timeseries for InterSystemsId generated


<IPython.core.display.Javascript object>

Timeseries for InternalLogonType generated


<IPython.core.display.Javascript object>

Timeseries for IntraSystemId generated


<IPython.core.display.Javascript object>

Timeseries for Item generated


<IPython.core.display.Javascript object>

Timeseries for ItemName generated


<IPython.core.display.Javascript object>

Timeseries for ItemType generated


<IPython.core.display.Javascript object>

Timeseries for LoginStatus generated


<IPython.core.display.Javascript object>

Timeseries for LogonUserDisplayName generated


<IPython.core.display.Javascript object>

Timeseries for LogonUserSid generated


<IPython.core.display.Javascript object>

Timeseries for Logon_Type generated


<IPython.core.display.Javascript object>

Timeseries for MachineDomainInfo generated


<IPython.core.display.Javascript object>

Timeseries for MachineId generated


<IPython.core.display.Javascript object>

Timeseries for MailboxGuid generated


<IPython.core.display.Javascript object>

Timeseries for MailboxOwnerMasterAccountSid generated


<IPython.core.display.Javascript object>

Timeseries for MailboxOwnerSid generated


<IPython.core.display.Javascript object>

Timeseries for MailboxOwnerUPN generated


<IPython.core.display.Javascript object>

Timeseries for Members generated


<IPython.core.display.Javascript object>

Timeseries for MessageId generated


<IPython.core.display.Javascript object>

Timeseries for ModifiedObjectResolvedName generated


<IPython.core.display.Javascript object>

Timeseries for ModifiedProperties generated


<IPython.core.display.Javascript object>

Timeseries for Name generated


<IPython.core.display.Javascript object>

Timeseries for NewValue generated


<IPython.core.display.Javascript object>

Timeseries for OfficeId generated


<IPython.core.display.Javascript object>

Timeseries for OfficeObjectId generated


<IPython.core.display.Javascript object>

Timeseries for OfficeTenantId generated


<IPython.core.display.Javascript object>

Timeseries for OfficeTenantId_ generated


<IPython.core.display.Javascript object>

Timeseries for OfficeWorkload generated


<IPython.core.display.Javascript object>

Timeseries for OldValue generated


<IPython.core.display.Javascript object>

Timeseries for Operation generated


<IPython.core.display.Javascript object>

Timeseries for OperationProperties generated


<IPython.core.display.Javascript object>

Timeseries for OperationScope generated


<IPython.core.display.Javascript object>

Timeseries for OrganizationId generated


<IPython.core.display.Javascript object>

Timeseries for OrganizationId_ generated


<IPython.core.display.Javascript object>

Timeseries for OrganizationName generated


<IPython.core.display.Javascript object>

Timeseries for OriginatingServer generated


<IPython.core.display.Javascript object>

Timeseries for Parameters generated


<IPython.core.display.Javascript object>

Timeseries for RecordType generated


<IPython.core.display.Javascript object>

Timeseries for ResultReasonType generated


<IPython.core.display.Javascript object>

Timeseries for ResultStatus generated


<IPython.core.display.Javascript object>

Timeseries for Scope generated


<IPython.core.display.Javascript object>

Timeseries for SendAsUserMailboxGuid generated


<IPython.core.display.Javascript object>

Timeseries for SendAsUserSmtp generated


<IPython.core.display.Javascript object>

Timeseries for SendOnBehalfOfUserSmtp generated


<IPython.core.display.Javascript object>

Timeseries for SendonBehalfOfUserMailboxGuid generated


<IPython.core.display.Javascript object>

Timeseries for SharingType generated


<IPython.core.display.Javascript object>

Timeseries for Site_ generated


<IPython.core.display.Javascript object>

Timeseries for Site_Url generated


<IPython.core.display.Javascript object>

Timeseries for Site_Url_ generated


<IPython.core.display.Javascript object>

Timeseries for SourceFileExtension generated


<IPython.core.display.Javascript object>

Timeseries for SourceFileName generated


<IPython.core.display.Javascript object>

Timeseries for SourceFileName_ generated


<IPython.core.display.Javascript object>

Timeseries for SourceRecordId generated


<IPython.core.display.Javascript object>

Timeseries for SourceRelativeUrl generated


<IPython.core.display.Javascript object>

Timeseries for SourceRelativeUrl_ generated


<IPython.core.display.Javascript object>

Timeseries for SourceSystem generated


<IPython.core.display.Javascript object>

Timeseries for Source_Name generated


<IPython.core.display.Javascript object>

Timeseries for Start_Time generated


<IPython.core.display.Javascript object>

Timeseries for SupportTicketId generated


<IPython.core.display.Javascript object>

Timeseries for TabType generated


<IPython.core.display.Javascript object>

Timeseries for TargetContextId generated


<IPython.core.display.Javascript object>

Timeseries for TargetUserId generated


<IPython.core.display.Javascript object>

Timeseries for TargetUserOrGroupName generated


<IPython.core.display.Javascript object>

Timeseries for TargetUserOrGroupType generated


<IPython.core.display.Javascript object>

Timeseries for TeamGuid generated


<IPython.core.display.Javascript object>

Timeseries for TeamName generated


<IPython.core.display.Javascript object>

Timeseries for TenantId generated


<IPython.core.display.Javascript object>

Timeseries for UserAgent generated


<IPython.core.display.Javascript object>

Timeseries for UserDomain generated


<IPython.core.display.Javascript object>

Timeseries for UserId generated


<IPython.core.display.Javascript object>

Timeseries for UserId_ generated


<IPython.core.display.Javascript object>

Timeseries for UserKey generated


<IPython.core.display.Javascript object>

Timeseries for UserSharedWith generated


<IPython.core.display.Javascript object>

Timeseries for UserType generated


<IPython.core.display.Javascript object>

Timeseries for _ResourceId generated

Querying Raw Data For 

Timestamp for Anomalous Features



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0,TimeGenerated,Anomalous Features
0,2021-08-15 08:18:06.454916+00:00,"[OfficeId, Operation, Parameters, SourceRecordId]"
1,2021-08-16 08:18:06.454916+00:00,"[OfficeId, Operation, Parameters, SourceRecordId]"
2,2021-08-17 13:18:06.454916+00:00,"[OfficeId, Operation, Parameters, SourceRecordId]"
3,2021-08-17 18:18:06.454916+00:00,"[ItemType, OfficeObjectId, SourceFileExtension, SourceFileName, SourceFileName_, SourceRelativeU..."
4,2021-08-18 07:18:06.454916+00:00,"[ClientIP, ClientIP_, Item]"
5,2021-08-18 09:18:06.454916+00:00,[Item]
6,2021-08-18 16:18:06.454916+00:00,[Item]
7,2021-08-18 17:18:06.454916+00:00,"[Client_IPAddress, Folders, LogonUserSid, MailboxGuid, MailboxOwnerSid, MailboxOwnerUPN]"
8,2021-08-18 19:18:06.454916+00:00,"[ClientInfoString, ItemType, OfficeId, OfficeObjectId, RecordType, Site_Url, Site_Url_, SourceFi..."
9,2021-08-18 20:18:06.454916+00:00,[Item]


## Select A Timestamp To Visualize

In [42]:
try:
    timestamp_dropdown = widgets.Dropdown(
        options=sorted(list(anomalyDf[timestamp_col])),
        description='TimeStamp:',
        disabled=False,
    )
    print('\nSelect a timestamp to visualize anomalous features ')
    display(timestamp_dropdown)
except NameError:
    print("Anomaly Dataframe not instantiated yet.")


Select a timestamp to visualize anomalous features 


Dropdown(description='TimeStamp:', options=(Timestamp('2021-08-15 08:18:06.454916+0000', tz='UTC'), Timestamp(…

## Visualize Anomalies

In [44]:
from msticpy.nbtools.timeseries import display_timeseries_anomolies
pd.set_option("display.max_rows", None, "display.max_columns", None)

try:
    anomalous_features = tuple(anomalyDf.loc[anomalyDf[timestamp_col] == timestamp_dropdown.value]['Anomalous Features'])[0]
    for feature in anomalous_features:
        print(f"\nTime Series for {feature}\n")
        display_timeseries_anomolies(data=featureDict[feature], y=feature)
    display(timeframeDict[timestamp_dropdown.value])
except NameError:
    print("Anomaly Dataframe not instantiated yet.")


Time Series for OfficeId




Time Series for Operation




Time Series for Parameters




Time Series for SourceRecordId



AttributeError: 'Styler' object has no attribute 'head'

In [36]:
df = pd.DataFrame({'a':[1,2,3,4],'b':['','',1,''],'c':['a','b','c','']})
df_column_colors=['red','blue','green']
anomalousFeatures = ['a', 'c']
nonAnomalous = ['b']

def mapAnomalousColors(cells):
    if cells.name in anomalousFeatures:
        return ['background-color: #ffcccb' for i in range(len(cells))]
    return ['background-color: #90EE90' for i in range(len(cells))]

df = df.style.apply(mapAnomalousColors)
df

Unnamed: 0,a,b,c
0,1,,a
1,2,,b
2,3,1.0,c
3,4,,
