73 form.py
@@ -14,6 +14,8 @@
from HTMLParser import HTMLParser
import itertools
import utils
import tempfile
import os

class form(Tkinter.Tk):
"""
@@ -40,13 +42,17 @@ class mainForm(form):
"""
currHRN = ''
currName = ''
currHTML = ''
results = []
HRNMap = {}
NameMap = {}
clients = {}
pages = []
page = 0
def initialise(self):
# set up temp dir
self.tempDir = tempfile.gettempdir()
self.tempFileURL = os.path.join(self.tempDir, 'currClient.html')

# Set grid
self.grid()
@@ -74,10 +80,12 @@ def initialise(self):

# set top buttons
self.btnSearch = Tkinter.Button(self, text=u"Search", command=self.refresh)
self.btnSearch.grid(column=3, row=3, columnspan=2)
self.btnSearch.grid(column=3, row=2, columnspan=2)
self.btnClear = Tkinter.Button(self, text=u"Clear", command=self.clearTable)
self.btnClear.grid(column=3, row=1, columnspan=2)

self.btnSelect = Tkinter.Button(self, text=u"Select", command=self.select)
self.btnSelect.grid(column=3, row=4, columnspan=2)

# set Table
self.table = SimpleTable(self)
self.table.grid(column=0, row=5, rowspan=5, columnspan=4, sticky='EW')
@@ -99,12 +107,13 @@ def initialise(self):
self.grid_columnconfigure(3,weight=1,minsize=100)

def loadData(self):
'''Loads data'''
#self.HTMLFile = tkFileDialog.askopenfilename(defaultextension='.html', initialdir='C:/temp/', title='Choose Mass Client Summary', parent=self)
encodedFilename = tkFileDialog.askopenfilename(defaultextension='.html', initialdir='C:/temp/', title='Choose Mass Client Summary', parent=self)
passwd = tkSimpleDialog.askstring('Enter Password', 'Please Enter the password.\n\nNote: make sure it is correct!')
with open(encodedFilename, 'rb') as f:
HTMLRaw = utils.decrypt(f, passwd)
self.parseHTML(HTMLRaw)
self.parseDIV(HTMLRaw)

def _first(self):
self.page = 0
@@ -126,6 +135,19 @@ def _last(self):
self.page = len(self.pages) - 1
self.updateTable()

def select(self):
if self.table.selected == None or self.table.selected == '':
return
self.getCurrHTML()
self._showHTML()

def _showHTML(self):
if os.path.exists(self.tempFileURL):
os.remove(self.tempFileURL)
with open(self.tempFileURL, 'wb') as f:
f.write(self.currHTML)
webbrowser.open(self.tempFileURL)

def refresh(self):
tempHRN = self.tbHRN.get()
if tempHRN == '':
@@ -142,6 +164,8 @@ def refresh(self):
def getDataForMap(self, html):
HRNMap = {}
NameMap = {}
if html == '\r\n' or html == '\n\n' or html == '\n':
return
soup = bs4.BeautifulSoup(html, "html.parser")
demo = soup.contents[3].contents[1].contents[1].contents[1].contents[1]

@@ -168,23 +192,33 @@ def parseHTML(self, text=None):
text = text[startIndex: stopIndex]
#print(text)
delim = text.split('<br />')
counter = 1
counter = 0
temp = ''
self.key = 0
for br in delim:
#print("+++" + str(counter) + br)
if br == '\n':
continue
counter += 1
temp += br
if counter == 1:
self.getDataForMap(br)
counter += 1
if counter == 12:
if counter == 11:
self.clients[self.key] = temp
temp = ''
counter = 1
counter = 0
self.key += 1
temp += br


def parseDIV(self, text=None):
if text == None:
return None
text = str(text).strip('\n')
startIndex = text.find('<body>') + 6
stopIndex = text.find('</body>')
text = text[startIndex: stopIndex]
for client in text.find() findall('DIV'):
print(client)

def parseHTMLFile(self):
f = file
with open(self.HTMLFile, 'r') as f:
@@ -223,7 +257,18 @@ def getClient(self):
self.results.append([key, self.HRNMap[key], name])
self._splitPages()
self.updateTable()


def getCurrHTML(self):
self.currHTML = ''
if self.table.selected != None:
for key, name in self.NameMap.iteritems():
if name == self.table.selected:
self.table.selected = self.HRNMap[key]
for key, hrn in self.HRNMap.iteritems():
if hrn == self.table.selected:
self.currHTML = self.clients[key]
return

def _splitPages(self, size=4):
it = iter(self.results)
item = list(itertools.islice(it, size))
@@ -247,6 +292,7 @@ def clearTable(self):
self.table.clear()

class SimpleTable(Tkinter.Frame):

def __init__(self, parent, rows=5, columns=2):
# use black background so it "peeks through" to
# form grid lines
@@ -259,14 +305,19 @@ def __init__(self, parent, rows=5, columns=2):
for column in range(columns):
label = Tkinter.Label(self, text='', borderwidth=0)
label.grid(row=row, column=column, sticky="nsew", padx=1, pady=1)
label.bind('<Button-1>', self._tableClick)
current_row.append(label)
self._widgets.append(current_row)

for column in range(columns):
self.grid_columnconfigure(column, weight=1)
self.set(0, 0, 'HRN')
self.set(0, 1, 'Name')


def _tableClick(self, event):
text = event.widget.cget('text')
self.selected = text

def clear(self):
for row in range(1, self.rows):
for col in range(self.columns):
BIN +2.03 KB (120%) form.pyc
Binary file not shown.
BIN +87 Bytes (100%) indivPerson.pyc
Binary file not shown.
@@ -0,0 +1,53 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-

# PCSView - parser
# ----------------
# This is the individual client class

import bs4
from HTMLParser import HTMLParser

class PCSParser(object):
inFile = ''
HRNMap = {}
NameMap = {}
clients = {}

def __init__(self, inFile=''):
self.inFile = inFile

def getDataForMap(self, html):
HRNMap = {}
NameMap = {}
soup = bs4.BeautifulSoup(html, "html.parser")
demo = soup.contents[3].contents[1].contents[1].contents[1].contents[1]
self.HRNMap[self.key] = str(demo.contents[7].contents)
self.NameMap[self.key] = str(demo.contents[3].contents)

def parseHTML(self):
f = file
with open(self.inFile, 'r') as f:
text = str(f.read()).strip('\n')
startIndex = text.find('<body>') + 6
stopIndex = text.find('</body>')
text = text[startIndex: stopIndex]
delim = text.split('<br />')
counter = 1
temp = ''
self.key = 0
for br in delim:
if br == '\n':
continue
if counter == 1:
self.getDataForMap(br)
counter += 1
if counter == 12:
self.clients[self.key] = temp
temp = ''
counter = 1
self.key += 1
temp += br

def getClient(self, hrn=None, name=None):
pass
18 test.py
@@ -0,0 +1,18 @@
from lxml import etree

#with open('test2.html', 'rb') as fin, open('test2.out.html', 'wb') as fout:
#shtml = fin.read()
#find = "</td>\r\n<tr>\r\n</table>"
#replace = "</td>\r\n</tr>\r\n</table>"
#shtml.replace(find, replace)
#fout.write(shtml.replace(find, replace))

with open('indiv.html', 'rb') as fin:
shtml = fin.read()

#parser = etree.HTMLParser()
#startIndex = shtml.find('<body>') + 6
#stopIndex = shtml.find('</body>')
#shtml = shtml[startIndex: stopIndex].strip('\r\n')
tree = etree.HTML(shtml)
print(etree.tostring(tree))
@@ -12,7 +12,6 @@
from Crypto import Random
from HTMLParser import HTMLParser


class MLStripper(HTMLParser):
def __init__(self):
self.reset()
@@ -146,7 +145,10 @@ def derive_key_and_iv(password, salt, key_length, iv_length):

if __name__ == '__main__':
password = 'derp'
in_filename = 'test.enc.html'
out_filename = 'test.dec.html'
with open(in_filename, 'rb') as in_file, open(out_filename, 'wb') as out_file:
in_filename = 'test2.html'
enc_filename = 'test2.enc.html'
dec_filename = 'test2.dec.html'
with open(in_filename, 'rb') as in_file, open(enc_filename, 'wb') as out_file:
encryptToFile(in_file, out_file, password)
with open(enc_filename, 'rb') as in_file, open(dec_filename, 'wb') as out_file:
decryptToFile(in_file, out_file, password)
BIN +151 Bytes (100%) utils.pyc
Binary file not shown.