From c8510cddc5b35a2a8e60ab3ba9f89b130cf0f832 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 23 Mar 2013 02:36:15 +0100 Subject: [PATCH] fix encoding issues with hashsum calculation --- libhproxy/flowcollection.py | 60 ++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/libhproxy/flowcollection.py b/libhproxy/flowcollection.py index 9a2c0d8..4486f04 100644 --- a/libhproxy/flowcollection.py +++ b/libhproxy/flowcollection.py @@ -95,6 +95,7 @@ def addFlow(self, flow): #Save decoded content decoded_content = {} + algorithms = ["md5","sha1","sha256"] for i in ["request","response"]: #strip content out of the flowRepr flowRepr[i]["contentLength"] = len(flowRepr[i]["content"]) @@ -112,6 +113,29 @@ def addFlow(self, flow): decoded = decoded_ except: print "Warning: Data cannot be decoded with given Content Encoding." + + #calculate hashsums + flowRepr[i]["contentChecksums"] = {} + parts = {"Checksum":decoded} + + #Handle multipart checksums + if i == "request": + try: + headers = dict(map(str.lower, map(str,a)) for a in flow.request.headers) # odict -> (lowered) dict + fs = cgi.FieldStorage(StringIO.StringIO(decoded),headers,environ={ 'REQUEST_METHOD':'POST' }) + parts = getParts(fs) + except Exception as e: + import traceback + traceback.print_exc() + print "Warning: Cannot decode multipart" + + for item, data in parts.viewitems(): + checksums = {} + for a in algorithms: + checksums[a] = getattr(hashlib,a)(data).hexdigest() + flowRepr[i]["contentChecksums"][item] = checksums + + #decode with http content-type encoding ct = r.headers["content-type"] @@ -138,39 +162,7 @@ def addFlow(self, flow): except: print "Warning: Cannot encode request to utf8" decoded_content[i] = decoded - - #calculate hashsums - algorithms = ["md5","sha1","sha256"] - for i in ["request","response"]: - - flowRepr[i]["contentChecksums"] = {} - - parts = {"Checksum":decoded_content[i]} - - #Handle multipart checksums - if i == "request": - try: - headers = dict(map(str.lower, map(str,a)) for a in flow.request.headers) # odict -> (lowered) dict - fs = cgi.FieldStorage(StringIO.StringIO(decoded_content[i]),headers,environ={ 'REQUEST_METHOD':'POST' }) - parts = getParts(fs) - except Exception as e: - import traceback - traceback.print_exc() - print "Warning: Cannot decode multipart" - - #TODO: Analyze request and split it up into parameters to match file upload - for item, data in parts.viewitems(): - checksums = {} - encoded = data - try: - encoded = data.encode("latin-1") # FIXME: I don't know why we need that currently, we need to investigate that. - except: - pass - for a in algorithms: - checksums[a] = getattr(hashlib,a)(encoded).hexdigest() - flowRepr[i]["contentChecksums"][item] = checksums - - + self._flows.append(flow) self._flows_serialized.append(flowRepr) self._decoded_contents.append(decoded_content) @@ -198,4 +190,4 @@ def __enter__(self): def __exit__(self, exc_type, value, tb): for flow in self.flows: for i in ["request","response"]: - del flow[i]["content"] \ No newline at end of file + del flow[i]["content"]