Skip to content

Commit

Permalink
fix encoding issues with hashsum calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
mhils committed Mar 23, 2013
1 parent a8d0fea commit c8510cd
Showing 1 changed file with 26 additions and 34 deletions.
60 changes: 26 additions & 34 deletions libhproxy/flowcollection.py
Expand Up @@ -95,6 +95,7 @@ def addFlow(self, flow):

#Save decoded content
decoded_content = {}
algorithms = ["md5","sha1","sha256"]
for i in ["request","response"]:
#strip content out of the flowRepr
flowRepr[i]["contentLength"] = len(flowRepr[i]["content"])
Expand All @@ -112,6 +113,29 @@ def addFlow(self, flow):
decoded = decoded_
except:
print "Warning: Data cannot be decoded with given Content Encoding."

#calculate hashsums
flowRepr[i]["contentChecksums"] = {}
parts = {"Checksum":decoded}

#Handle multipart checksums
if i == "request":
try:
headers = dict(map(str.lower, map(str,a)) for a in flow.request.headers) # odict -> (lowered) dict
fs = cgi.FieldStorage(StringIO.StringIO(decoded),headers,environ={ 'REQUEST_METHOD':'POST' })
parts = getParts(fs)
except Exception as e:
import traceback
traceback.print_exc()
print "Warning: Cannot decode multipart"

for item, data in parts.viewitems():
checksums = {}
for a in algorithms:
checksums[a] = getattr(hashlib,a)(data).hexdigest()
flowRepr[i]["contentChecksums"][item] = checksums



#decode with http content-type encoding
ct = r.headers["content-type"]
Expand All @@ -138,39 +162,7 @@ def addFlow(self, flow):
except:
print "Warning: Cannot encode request to utf8"
decoded_content[i] = decoded

#calculate hashsums
algorithms = ["md5","sha1","sha256"]
for i in ["request","response"]:

flowRepr[i]["contentChecksums"] = {}

parts = {"Checksum":decoded_content[i]}

#Handle multipart checksums
if i == "request":
try:
headers = dict(map(str.lower, map(str,a)) for a in flow.request.headers) # odict -> (lowered) dict
fs = cgi.FieldStorage(StringIO.StringIO(decoded_content[i]),headers,environ={ 'REQUEST_METHOD':'POST' })
parts = getParts(fs)
except Exception as e:
import traceback
traceback.print_exc()
print "Warning: Cannot decode multipart"

#TODO: Analyze request and split it up into parameters to match file upload
for item, data in parts.viewitems():
checksums = {}
encoded = data
try:
encoded = data.encode("latin-1") # FIXME: I don't know why we need that currently, we need to investigate that.
except:
pass
for a in algorithms:
checksums[a] = getattr(hashlib,a)(encoded).hexdigest()
flowRepr[i]["contentChecksums"][item] = checksums



self._flows.append(flow)
self._flows_serialized.append(flowRepr)
self._decoded_contents.append(decoded_content)
Expand Down Expand Up @@ -198,4 +190,4 @@ def __enter__(self):
def __exit__(self, exc_type, value, tb):
for flow in self.flows:
for i in ["request","response"]:
del flow[i]["content"]
del flow[i]["content"]

0 comments on commit c8510cd

Please sign in to comment.