# LocalData Provider with velociraptor

https://msticpy.readthedocs.io/en/v1.1.0/data_acquisition/DataProviders.html#using-local-data-the-localdata-provider

Analysis of velociraptor offline collection

Possible test data
* https://github.com/blueteamvillage/Project-Obsidian-DC30#offline-access and https://media.blueteamvillage.org/DC30/Obsidian/


## Imports

In [1]:
# Check we are running Python 3.6
import sys

MIN_REQ_PYTHON = (3, 6)
if sys.version_info < MIN_REQ_PYTHON:
    print("Check the Kernel->Change Kernel menu and ensure that Python 3.6")
    print("or later is selected as the active kernel.")
    sys.exit("Python %s.%s or later is required.\n" % MIN_REQ_PYTHON)

# imports
import json
import os
import pandas as pd
import re
from urllib.parse import urlparse
import yaml
import msticpy.nbtools as nbtools

# data library imports
from msticpy.data.data_providers import QueryProvider

# import msticpy.data.data_query_reader as QueryReader
# from msticpy.data.param_extractor import extract_query_params
# import msticpy.nbtools as mas

print("Imports Complete")

Imports Complete


In [2]:
from msticpy.vis import process_tree
from msticpy.transform.proc_tree_builder import LX_EVENT_SCH
from msticpy.transform.proc_tree_builder import WIN_EVENT_SCH
from copy import copy

## Variables

In [3]:
# directory with velociraptor data (*.json or *.csv)
datadir = "/path/to/velociraptor_Collection-wkst02_magnumtempus_financial-2022-02-13T01_10_23Z"
# directory with queries yaml file
query_path = "/path/to"

## Load Data

In [4]:
# Specify path to look for data files
data_path = datadir
qry_prov = QueryProvider("LocalData", data_paths=[data_path], query_paths=[query_path])

In [5]:
# Show the schema of the data files read in
# print(qry_prov.schema)

In [6]:
print(json.dumps(qry_prov.schema, indent=2))

{
  "windows.applications.chrome.history.json": {
    "User": "object",
    "FullPath": "object",
    "Mtime": "object",
    "visited_url": "object",
    "title": "object",
    "visit_count": "int64",
    "typed_count": "int64",
    "last_visit_time": "datetime64[ns, UTC]"
  },
  "custom.windows.sysinternals.psinfo.json": {
    "Stdout": "object",
    "Stderr": "object",
    "ReturnCode": "int64",
    "Complete": "bool"
  },
  "windows.memory.acquisition.json": {
    "Stdout": "object",
    "Stderr": "object",
    "Upload": "object"
  },
  "custom.windows.netbios.json": {
    "Stdout": "object",
    "Stderr": "object",
    "ReturnCode": "int64",
    "Complete": "bool"
  },
  "windows.network.listeningports.json": {
    "Pid": "int64",
    "Name": "object",
    "Port": "int64",
    "Protocol": "object",
    "Family": "object",
    "Address": "object"
  },
  "windows.applications.edge.history.json": {},
  "custom.windows.patches.json": {
    "Stdout": "object",
    "Stderr": "object",
  

In [7]:
qry_prov.list_queries()

['Azure.list_all_signins_geo',
 'Network.list_azure_network_flows_by_host',
 'Network.list_azure_network_flows_by_ip',
 'SecurityAlert.list_alerts',
 'WindowsSecurity.get_process_tree',
 'WindowsSecurity.list_host_events',
 'WindowsSecurity.list_host_logon_failures',
 'WindowsSecurity.list_host_logons',
 'WindowsSecurity.list_host_processes',
 'windows.autoruns',
 'windows.chromeextensions',
 'windows.chromehistory',
 'windows.inventoryapplicationfile',
 'windows.mft',
 'windows.netstat',
 'windows.pslist',
 'windows.services',
 'windows.taskscheduler',
 'windows.users',
 'windows.usn']

In [8]:
df_process = qry_prov.windows.pslist()
df_process.head()

Unnamed: 0,Pid,Ppid,TokenIsElevated,Name,CommandLine,Exe,Hash,Authenticode,Username,WorkingSetSize
0,4,0,False,System,,,,,,143360
1,264,4,True,smss.exe,\SystemRoot\System32\smss.exe,C:\Windows\System32\smss.exe,"{'MD5': '725ec50d4b0f607bf5b45b5e0115770b', 'S...","{'Filename': 'C:\Windows\System32\smss.exe', '...",NT AUTHORITY\SYSTEM,1273856
2,352,344,True,csrss.exe,%SystemRoot%\system32\csrss.exe ObjectDirector...,C:\Windows\System32\csrss.exe,"{'MD5': '955e9227aa30a08b7465c109b863b886', 'S...","{'Filename': 'C:\Windows\System32\csrss.exe', ...",NT AUTHORITY\SYSTEM,4304896
3,420,412,True,csrss.exe,%SystemRoot%\system32\csrss.exe ObjectDirector...,C:\Windows\System32\csrss.exe,"{'MD5': '955e9227aa30a08b7465c109b863b886', 'S...","{'Filename': 'C:\Windows\System32\csrss.exe', ...",NT AUTHORITY\SYSTEM,3756032
4,440,344,True,wininit.exe,wininit.exe,C:\Windows\System32\wininit.exe,"{'MD5': '5a998f811d7805b79b8e769027f62fd2', 'S...",{'Filename': 'C:\Windows\System32\wininit.exe'...,NT AUTHORITY\SYSTEM,5050368


In [9]:
df_netstat = qry_prov.windows.netstat()
df_netstat.head()

Unnamed: 0,Pid,Name,Family,Type,Status,Laddr.IP,Laddr.Port,Raddr.IP,Raddr.Port,Timestamp
0,708,svchost.exe,IPv4,TCP,LISTEN,0.0.0.0,135,0.0.0.0,0,2022-02-12 18:47:17+00:00
1,4,System,IPv4,TCP,LISTEN,172.16.50.131,139,0.0.0.0,0,2022-02-12 18:47:19+00:00
2,828,svchost.exe,IPv4,TCP,LISTEN,0.0.0.0,3389,0.0.0.0,0,2022-02-12 18:47:20+00:00
3,828,svchost.exe,IPv4,TCP,ESTAB,172.16.50.131,3389,172.16.21.100,50523,2022-02-12 22:38:35+00:00
4,828,svchost.exe,IPv4,TCP,ESTAB,172.16.50.131,3389,172.16.21.100,37474,2022-02-13 01:08:39+00:00


In [10]:
df_autoruns = qry_prov.windows.autoruns()
df_autoruns.head()

Unnamed: 0,Time,Entry Location,Entry,Enabled,Category,Profile,Description,Company,Image Path,Version,Launch String,MD5,SHA-1,PESHA-1,PESHA-256,SHA-256,IMP
0,20220212-202840,HKLM\System\CurrentControlSet\Control\Session ...,,,Boot Execute,System-wide,,,,,,,,,,,
1,20210408-073952,HKLM\System\CurrentControlSet\Control\Session ...,autocheck autochk /q /v *,enabled,Boot Execute,System-wide,Auto Check Utility,Microsoft Corporation,c:\windows\system32\autochk.exe,10.0.14393.4350,autocheck autochk /q /v *,A512733E2C767F87A8029400B4A48CD0,E20DD6960F5EFB37D147D26910FF239D57EFFC06,FDE685A5880D3EF3A5DE738FBADB91480A8A8315,E746C91AB4AF82B5EF60792A6388793EB2ED6E32C919E5...,1ED75EB59C2897304E0160E0605071178418802C31910D...,1BF5E4792E849FE3BCFE23E7C1B21A3F
2,20220212-002036,HKLM\Software\Microsoft\Office\Outlook\Addins,,,Office Addins,System-wide,,,,,,,,,,,
3,20210605-043306,HKLM\Software\Microsoft\Office\Outlook\Addins,Windows_Search_OutlookToolbar,disabled,Office Addins,System-wide,Outlook MSSearch Connector,Microsoft Corporation,c:\windows\system32\mssphtb.dll,7.0.14393.4467,HKCR\CLSID\{F37AFD4F-E736-4980-8650-A486B1F2DF25},05BD2C094A2B52481554F6841149345D,A3EAB25EE07AE1D9700C3CEDEEA588D66A8E49EF,E3013E221F9F6E24AB1BA8591C973540BD8D210B,71EE77030DF5F89D89B13DED3B65A1BB10B87FE227A186...,6144C47A4F28EF44150E19E986F0B5FA1D28E5AA553EDC...,DBA3AD1CA0A0E7F336F8C0911CFC3BA8
4,20211231-082038,HKLM\Software\Microsoft\Office\Outlook\Addins,LyncAddin Class,enabled,Office Addins,System-wide,Skype for Business,Microsoft Corporation,c:\program files\microsoft office\root\office1...,16.0.14827.20024,HKCR\CLSID\{a6a2383f-ad50-4d52-8110-3508275e77f7},792A6D548120A7C829E57BCF132446F9,861CDF3C15ECC3562CA2C61BBD4317F6F56E20B3,47FA36A117350E465EBC80C9251616E76E68FBAD,38998B4C0F4470C005FBE7A347F14042854344E00BE5C5...,B9554A476B88D77351369BBB2B1FC3A6D91F06FC8677AB...,52EE43B6C8076104B51CE3CC035CF7DA


In [11]:
df_taskscheduler_analysis = qry_prov.windows.taskscheduler()
df_taskscheduler_analysis.head()

Unnamed: 0,FullPath,Command,Arguments,ComHandler,UserId,_XML
0,C:\Windows\System32\Tasks\Amazon Ec2 Launch - ...,C:\Windows\System32\cmd.exe,/C C:\Windows\System32\WindowsPowerShell\v1.0\...,,S-1-5-18,"{'Task': {'Actions': {'AttrContext': 'Author',..."
1,C:\Windows\System32\Tasks\Daily MagnumTempus I...,c:\windows\system32\cmd.exe /c start /B C:\win...,,,MAGNUMTEMPUS\karen.metuens,"{'Task': {'Actions': {'AttrContext': 'Author',..."
2,C:\Windows\System32\Tasks\GoogleUpdateTaskMach...,C:\Program Files (x86)\Google\Update\GoogleUpd...,/c,,S-1-5-18,"{'Task': {'Actions': {'AttrContext': 'Author',..."
3,C:\Windows\System32\Tasks\GoogleUpdateTaskMach...,C:\Program Files (x86)\Google\Update\GoogleUpd...,/ua /installsource scheduler,,S-1-5-18,"{'Task': {'Actions': {'AttrContext': 'Author',..."
4,C:\Windows\System32\Tasks\MicrosoftEdgeUpdateT...,C:\Program Files (x86)\Microsoft\EdgeUpdate\Mi...,/c,,S-1-5-18,"{'Task': {'Actions': {'AttrContext': 'Author',..."


In [12]:
df_mft = qry_prov.windows.mft()
df_mft.head()

Unnamed: 0,EntryNumber,InUse,ParentEntryNumber,MFTPath,FullPath,FileName,FileSize,ReferenceCount,IsDir,Created0x10,Created0x30,LastModified0x10,LastModified0x30,LastRecordChange0x10,LastRecordChange0x30,LastAccess0x10,LastAccess0x30
0,0,True,5,C:/$MFT,$MFT,$MFT,297009152,1,False,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z
1,1,True,5,C:/$MFT,$MFTMirr,$MFTMirr,4096,1,False,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z
2,2,True,5,C:/$MFT,$LogFile,$LogFile,43941888,1,False,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z
3,3,True,5,C:/$MFT,$Volume,$Volume,0,1,False,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z
4,4,True,5,C:/$MFT,$AttrDef,$AttrDef,2560,1,False,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z,2021-11-10T06:20:19.11737Z


In [13]:
df_usn = qry_prov.windows.usn()
df_usn.head()

Unnamed: 0,Usn,Timestamp,Filename,FullPath,FileAttributes,Reason,SourceInfo,_FileMFTID,_FileMFTSequence,_ParentMFTID,_ParentMFTSequence
0,33554432,2022-02-12 01:10:07.824210700+00:00,{032950eb-3652-4041-8d4c-415213a2ea43}.tmp,Users/Default/AppData/Local/Packages/windows.i...,[ARCHIVE],[RENAME_OLD_NAME],[ARCHIVE],83401,3,81487,3
1,33554576,2022-02-12 01:10:07.824210700+00:00,NameSpace_Classic_{A304259D-52B8-4526-8B1A-A1D...,Users/Default/AppData/Local/Packages/windows.i...,[ARCHIVE],[RENAME_NEW_NAME],[ARCHIVE],83401,3,81487,3
2,33554784,2022-02-12 01:10:07.824800100+00:00,{309f78cd-53e7-435a-a9c4-bbaec675bf6d}.tmp,Users/Default/AppData/Local/Packages/windows.i...,[ARCHIVE],[FILE_CREATE],[ARCHIVE],1113,13,81487,3
3,33554928,2022-02-12 01:10:07.824800100+00:00,{a3e33889-e8f2-456e-9877-bbd52af0d53c}.tmp,Users/Default/AppData/Local/Packages/windows.i...,[ARCHIVE],[DATA_TRUNCATION],[ARCHIVE],263903,1,81487,3
4,33555072,2022-02-12 01:10:07.824800100+00:00,{a3e33889-e8f2-456e-9877-bbd52af0d53c}.tmp,Users/Default/AppData/Local/Packages/windows.i...,[ARCHIVE],"[DATA_EXTEND, DATA_TRUNCATION]",[ARCHIVE],263903,1,81487,3


In [14]:
df_chromehistory = qry_prov.windows.chromehistory()
df_chromehistory.head()

Unnamed: 0,User,FullPath,Mtime,visited_url,title,visit_count,typed_count,last_visit_time
0,Administrator,C:\Users\Administrator\AppData\Local\Google\Ch...,2022-02-12T00:12:07.0413643Z,https://wireshark.org/,Wireshark · Go Deep.,1,1,2022-02-12 00:08:49+00:00
1,Administrator,C:\Users\Administrator\AppData\Local\Google\Ch...,2022-02-12T00:12:07.0413643Z,https://www.wireshark.org/,Wireshark · Go Deep.,1,0,2022-02-12 00:08:49+00:00
2,Administrator,C:\Users\Administrator\AppData\Local\Google\Ch...,2022-02-12T00:12:07.0413643Z,https://www.wireshark.org/#download,Wireshark · Go Deep.,1,0,2022-02-12 00:08:51+00:00
3,celiste.pecunia,C:\Users\celiste.pecunia\AppData\Local\Google\...,2022-02-13T01:09:58.5698888Z,https://wireshark.org/,Wireshark · Go Deep.,1,1,2022-02-12 00:08:49+00:00
4,celiste.pecunia,C:\Users\celiste.pecunia\AppData\Local\Google\...,2022-02-13T01:09:58.5698888Z,https://www.wireshark.org/,Wireshark · Go Deep.,1,0,2022-02-12 00:08:49+00:00


## Analysis examples

### Process tree

In [15]:
cust_win_schema = copy(WIN_EVENT_SCH)
cust_win_schema.time_stamp = None
cust_win_schema.host_name_column = None
# Note these are used to filter events if you have a data
# set that contains mixed event types.
cust_win_schema.event_id_column = None
cust_win_schema.event_id_identifier = None

cust_win_schema.process_name = "Exe"
cust_win_schema.process_id = "Pid"
cust_win_schema.parent_name = None
cust_win_schema.parent_id = "Ppid"
# missing this column seems fatal. using placeholder value
cust_win_schema.logon_id = None
# cust_win_schema.logon_id = "logon_id"
cust_win_schema.target_logon_id = None
cust_win_schema.cmd_line = "CommandLine"
cust_win_schema.user_name = "Username"
cust_win_schema.user_id = None

In [16]:
# Failing as missing creation timestamp
# df_process.mp_plot.process_tree(schema=cust_win_schema, debug=False)

### Process Network

In [17]:
# Adding Parent Process Name to the process list data frame - memoptix
dfppidtxt = df_process[["Pid", "Name"]].drop_duplicates()
dfppidtxt.columns = ["Ppid", "ParentName"]
df_process2 = df_process.merge(dfppidtxt, on="Ppid", how="left")

In [18]:
df_process2.mp_plot.network(
    source_col="Name",
    target_col="ParentName",
    source_attrs=["Pid", "Ppid", "ParentName", "Username"],
    edge_attrs=["Username"],
)

### Netstat

In [19]:
df_netstat.mp_plot.matrix(
    # Linux
    # x="ProcessInfo.Command",
    # y="LocalAddr.IP",
    # Win
    x="Name",
    y="Laddr.IP",
    # ValueError: Unrecognized range input - seems string needed
    # y="LocalAddr.Port",
    title="Process name vs listening port Interaction",
)

### Windows autoruns

In [20]:
# df_autoruns.fillna("", inplace=True)

In [21]:
df_autoruns[
    ~df_autoruns["Image Path"].isnull()
    & (
        ~df_autoruns["Image Path"].str.contains(
            "\\windows\\\system32\\\\", flags=re.IGNORECASE
        )
        & ~df_autoruns["Image Path"].str.contains(
            "%systemroot%\\\\System32\\\\", flags=re.IGNORECASE
        )
        & ~df_autoruns["Image Path"].str.contains(
            "\\windows\\\syswow64\\\\", flags=re.IGNORECASE
        )
        & ~df_autoruns["Image Path"].str.contains(
            "c:\\\\program files\\\\", flags=re.IGNORECASE
        )
        & ~df_autoruns["Image Path"].str.contains(
            "c:\\\\program files \(x86\)\\\\", flags=re.IGNORECASE
        )
    )
]["Image Path"].unique()

array(['', 'c:\\windows\\sysmon64.exe',
       'c:\\windows\\servicing\\trustedinstaller.exe',
       'c:\\programdata\\microsoft\\windows defender\\platform\\4.18.2201.10-0\\nissrv.exe',
       'c:\\programdata\\microsoft\\windows defender\\platform\\4.18.2201.10-0\\msmpeng.exe',
       'c:\\windows\\sysmondrv.sys', 'c:\\windows\\explorer.exe',
       'File not found: cd /d ',
       'c:\\programdata\\microsoft\\windows defender\\platform\\4.18.2201.10-0\\mpcmdrun.exe',
       'c:\\users\\karen.metuens\\appdata\\local\\mozilla firefox\\default-browser-agent.exe',
       'c:\\users\\administrator\\appdata\\roaming\\microsoft\\windows\\start menu\\programs\\startup\\runwallpapersetupinit.cmd',
       'c:\\users\\celiste.pecunia\\appdata\\local\\microsoft\\teams\\update.exe',
       'c:\\users\\celiste.pecunia\\appdata\\roaming\\microsoft\\windows\\start menu\\programs\\startup\\runwallpapersetup.cmd',
       'c:\\users\\karen.metuens\\appdata\\local\\microsoft\\teams\\update.exe',
     

In [22]:
df_taskscheduler_analysis.fillna("", inplace=True)
df_taskscheduler_analysis[
    ~df_taskscheduler_analysis["Command"].isnull()
    & (
        ~df_taskscheduler_analysis["Command"].str.contains(
            "%windir%\\\\system32\\\\", flags=re.IGNORECASE
        )
        & ~df_taskscheduler_analysis["Command"].str.contains(
            "%systemroot%\\\\System32\\\\", flags=re.IGNORECASE
        )
    )
]["Command"].unique()

array(['C:\\Windows\\System32\\cmd.exe',
       'c:\\windows\\system32\\cmd.exe /c start /B C:\\windows\\temp\\cleanup.exe',
       'C:\\Program Files (x86)\\Google\\Update\\GoogleUpdate.exe',
       'C:\\Program Files (x86)\\Microsoft\\EdgeUpdate\\MicrosoftEdgeUpdate.exe',
       'C:\\Program Files (x86)\\Microsoft OneDrive\\OneDriveStandaloneUpdater.exe',
       'C:\\Program Files\\Npcap\\CheckStatus.bat',
       'C:\\Program Files\\Common Files\\Microsoft Shared\\ClickToRun\\OfficeC2RClient.exe',
       'C:\\Program Files\\Microsoft Office\\root\\Office16\\sdxhelper.exe',
       '', 'C:\\Windows\\system32\\sc.exe', 'BthUdTask.exe', 'sc.exe',
       'C:\\ProgramData\\Microsoft\\Windows Defender\\Platform\\4.18.2201.10-0\\MpCmdRun.exe',
       'C:\\Users\\karen.metuens\\AppData\\Local\\Mozilla Firefox\\default-browser-agent.exe'],
      dtype=object)

### Windows MFT$

In [23]:
df_mft[
    df_mft["FullPath"].str.contains("Windows/Temp/")
    & df_mft["FullPath"].str.contains(".exe")
]

Unnamed: 0,EntryNumber,InUse,ParentEntryNumber,MFTPath,FullPath,FileName,FileSize,ReferenceCount,IsDir,Created0x10,Created0x30,LastModified0x10,LastModified0x30,LastRecordChange0x10,LastRecordChange0x30,LastAccess0x10,LastAccess0x30
1005,1022,True,8433,C:/$MFT,Windows/Temp/officeclicktorun.exe_streamserver...,officeclicktorun.exe_streamserver(202202120109...,0,2,False,2022-02-12T01:09:04.6085259Z,2022-02-12T01:09:04.6085259Z,2022-02-12T01:09:04.6085259Z,2022-02-12T01:09:04.6085259Z,2022-02-12T01:09:04.6085259Z,2022-02-12T01:09:04.6085259Z,2022-02-12T01:09:04.6085259Z,2022-02-12T01:09:04.6085259Z
1104,1121,True,8433,C:/$MFT,Windows/Temp/officeclicktorun.exe_streamserver...,officeclicktorun.exe_streamserver(202202120141...,0,2,False,2022-02-12T01:41:08.043053Z,2022-02-12T01:41:08.043053Z,2022-02-12T01:41:08.043053Z,2022-02-12T01:41:08.043053Z,2022-02-12T01:41:08.043053Z,2022-02-12T01:41:08.043053Z,2022-02-12T01:41:08.043053Z,2022-02-12T01:41:08.043053Z
91584,91650,True,8433,C:/$MFT,Windows/Temp/officeclicktorun.exe_streamserver...,officeclicktorun.exe_streamserver(202202121847...,0,2,False,2022-02-12T18:47:30.0357813Z,2022-02-12T18:47:30.0357813Z,2022-02-12T18:47:30.0357813Z,2022-02-12T18:47:30.0357813Z,2022-02-12T18:47:30.0357813Z,2022-02-12T18:47:30.0357813Z,2022-02-12T18:47:30.0357813Z,2022-02-12T18:47:30.0357813Z
273506,277723,True,8433,C:/$MFT,Windows/Temp/met64.exe,met64.exe,7168,1,False,2022-02-12T21:34:15.3835668Z,2022-02-12T21:34:15.3835668Z,2022-02-12T21:34:15.3845662Z,2022-02-12T21:34:15.3835668Z,2022-02-12T21:34:15.3845662Z,2022-02-12T21:34:15.3835668Z,2022-02-12T21:34:15.3835668Z,2022-02-12T21:34:15.3835668Z
278362,282579,True,8433,C:/$MFT,Windows/Temp/cleanup.exe,cleanup.exe,11776,1,False,2022-02-12T21:15:59.0172115Z,2022-02-12T21:15:59.0172115Z,2022-02-12T21:15:59.0172115Z,2022-02-12T21:15:59.0172115Z,2022-02-12T21:15:59.0172115Z,2022-02-12T21:15:59.0172115Z,2022-02-12T21:15:59.0172115Z,2022-02-12T21:15:59.0172115Z
279338,283555,True,8433,C:/$MFT,Windows/Temp/p.exe,p.exe,1796608,1,False,2022-02-12T21:33:26.0948251Z,2022-02-12T21:33:26.0948251Z,2022-02-12T21:33:26.0958389Z,2022-02-12T21:33:26.0948251Z,2022-02-12T21:33:26.0958389Z,2022-02-12T21:33:26.0948251Z,2022-02-12T21:33:26.0948251Z,2022-02-12T21:33:26.0948251Z


In [24]:
# others unusual paths for exe
df_mft[
    (
        # df_mft['FullPath'].str.contains("/Temp/", flags=re.IGNORECASE) |
        df_mft["FullPath"].str.contains("/Tmp/", flags=re.IGNORECASE)
        | df_mft["FullPath"].str.contains("/ProgramData/", flags=re.IGNORECASE)
        | df_mft["FullPath"].str.contains("/Users/", flags=re.IGNORECASE)
    )
    & df_mft["FullPath"].str.contains(".exe", flags=re.IGNORECASE)
]

Unnamed: 0,EntryNumber,InUse,ParentEntryNumber,MFTPath,FullPath,FileName,FileSize,ReferenceCount,IsDir,Created0x10,Created0x30,LastModified0x10,LastModified0x30,LastRecordChange0x10,LastRecordChange0x30,LastAccess0x10,LastAccess0x30


### Chrome history

In [25]:
df_chromehistory["domain"] = df_chromehistory["visited_url"].apply(
    lambda x: urlparse(x).netloc
)

In [26]:
# Compare to Umbrella top1m
# https://umbrella.cisco.com/blog/cisco-umbrella-1-million
top1m_file = os.path.join(query_path, "top-1m.csv")
if os.path.exists(top1m_file):
    umbrella1m = pd.read_csv(top1m_file, names=["count", "domain"], header=None)
else:
    print(f"Missing umbrella file {top1m_file}")

In [27]:
# Filter out Umbrella 1 million. this can still contain relevant activities but for initial analysis, helpful to reduce noise
df_chromehistory_filtered = df_chromehistory[
    (~df_chromehistory["domain"].isin(umbrella1m["domain"].unique().tolist()))
]
df_chromehistory_filtered.shape

(4, 9)

In [28]:
df_chromehistory_filtered.head()

Unnamed: 0,User,FullPath,Mtime,visited_url,title,visit_count,typed_count,last_visit_time,domain
27,karen.metuens,C:\Users\karen.metuens\AppData\Local\Google\Ch...,2022-02-12T23:43:32.8738101Z,https://www.bficapital.com/groupblog/categorie...,Precious Metals,1,0,2022-02-12 22:22:56+00:00,www.bficapital.com
41,karen.metuens,C:\Users\karen.metuens\AppData\Local\Google\Ch...,2022-02-12T23:43:32.8738101Z,file://files.magnumtempusfinancial.com/public/...,How to Write an Executive Report,1,0,2022-02-12 21:27:47+00:00,files.magnumtempusfinancial.com
50,karen.metuens,C:\Users\karen.metuens\AppData\Local\Google\Ch...,2022-02-12T23:43:32.8738101Z,https://tempuswp.com/,Tempus Wealth Planning | Orange County Financi...,1,0,2022-02-12 22:47:54+00:00,tempuswp.com
51,karen.metuens,C:\Users\karen.metuens\AppData\Local\Google\Ch...,2022-02-12T23:43:32.8738101Z,https://tempuswp.com/meet-tempus,Meet Tempus - Tempus Wealth Planning — Tempus ...,1,0,2022-02-12 22:47:57+00:00,tempuswp.com
