# fileParser.ipynb
This notebook parses all log files in the ```logfiles``` as binary and prints out the corresponding hash. Building block of the web3.py module which will interact with the blockchain.

In [1]:
# ./fileParser.ipynb
import os
import hashlib

# Constants
LOG_TXT_PATH = 'logfiles/workstationLog.txt'
DEVICE_XML_PATH = 'logfiles/Device.xml'
MAKO_TCW_PATH = 'logfiles/makoTest2.tcw'

## Demonstrate the file read function

In [2]:
with open(LOG_TXT_PATH, "r") as f:
    for line in f.readlines():
        print(line)

NameError: name 'LOG_FILE_PATH' is not defined

## Parse Files for Metadata
<hr>

In [3]:
# Go through log files:
from dateutil.parser import parse

# Gather pertinent info from log file
computer_name = None
config_pushed = False
config_complete = None
with open(LOG_TXT_PATH, 'r') as file:
    for line in file.readlines():
        # print(line)
        # Extract computer name
        if 'desktop' in line.lower() and '.' not in line:
            computer_name = line.split(' ')[-1]
        if 'starting publish' in line.lower():
            pass# print(line)
        if 'download complete' in line.lower():
            # print(line)
            config_pushed = True
            config_complete = parse(line[:line.index(',')])
if computer_name and config_pushed:          
    print('Computer name: ' + computer_name)
    print('Configuration changed on: ' + str(config_complete))

Computer name: DESKTOP-V8GI6RV

Configuration changed on: 2022-07-04 17:49:11


In [4]:
# Go through Device.xml
import xml.etree.ElementTree as ET

tree = ET.parse(DEVICE_XML_PATH)
root = tree.getroot()
print('All Enabled IP addresses associated with this machine:')
for child in root.findall('./NetworkAdapters/MarkVIeNetworkAdapter'):
    if child.attrib['Enabled'] == 'true':
        print(child.attrib['IPAddress'])

All Enabled IP addresses associated with this machine:
192.168.101.111
192.168.1.8
192.168.2.8
192.168.3.8


In [5]:
# Go through .tcw file
tree = ET.parse(MAKO_TCW_PATH)
root = tree.getroot()
root.tag
for child in root.findall('./System/TssSettings'):
    for attribute in child.attrib:
        print(attribute + ': ' + child.attrib[attribute])

TssConfigOption: LowAccuracy
DisableTssBroadcasts: true
PrimaryTimeSource: workstation
SecondaryTimeSource: 
SiteTimeSource1: 
SiteTimeSource2: 
TssClientConfigurationOption: Disabled


## Hash all file types
<hr/>

Define function for repetitive task:

In [6]:
# Read file in chunks (future-proofing) and generate hash:
def hashGenerator(file, buffer_size = 65536):
    file_hash = hashlib.sha256()
    # Read file as binary
    with open(file, 'rb') as f:
        chunk = f.read(buffer_size)
        # Keep reading and updating hash as long as there is more data:
        while len(chunk) > 0:
            file_hash.update(chunk)
            chunk = f.read(buffer_size)
    return file_hash

In [7]:
# Get Device.xml hash
device_hash = hashGenerator(DEVICE_XML_PATH)
print("Hash of Device.xml: " + device_hash.hexdigest())

# Demonstrate that file extension doesn't matter with binary read:
device_hash = hashGenerator('logfiles/Device-Copy1')
print("Hash of Device-Copy1: " + device_hash.hexdigest())


# Get makoTest2.tcw hash
mako_hash = hashGenerator(MAKO_TCW_PATH)
print('Hash of makoTest2.tcw: ' + mako_hash.hexdigest())

# get workstationLog.txt hash (formerly long filename)
workstation_log_hash = hashGenerator(LOG_TXT_PATH)
print('Hash of workstationLog.txt: ' + workstation_log_hash.hexdigest())

Hash of Device.xml: fcf9b277a29ca61e5661ff86a2ce053748b0ca0abf7c887a98c78ec84094a149
Hash of Device-Copy1: 8520e9758d7545f4b54be0088d65a7852f9502042d24dd6ef360806d0521dc30
Hash of makoTest2.tcw: dd869bfb12b0d49028648d84973aad4a40d721e7441f9a693a62f604766f54da
Hash of workstationLog.txt: 5f739d7ea0ae1de1b0fb06bc4cbd669f371e5d7778548550b1a2ac53fec5efc3


In [None]:
# timestamp+hash -> encryption w/ PK -> store with metadata