Permalink
Browse files

add checksum calculation for uploaded files. fixes #35

  • Loading branch information...
1 parent 891cae7 commit c447e50a7af47752455e1818cf374e7fa25d157a @mhils committed Dec 5, 2012
Showing with 86 additions and 7 deletions.
  1. +61 −6 libhproxy/flowcollection.py
  2. +1 −1 netlib
  3. +24 −0 test/parsemultipart.py
@@ -1,8 +1,42 @@
from libmproxy import encoding
from libhproxy.honey import HoneyProxy
-import re, socket
+import re, socket, cgi, StringIO
import hashlib #@UnusedImport
+"""
+flatten a given fieldStorage and return a dict with the following structure:
+{"filenameA":"filecontentA",...}
+This dict will be processed for creating hash checksums
+"""
+def getParts(fieldStorage,parts={}):
+ if type(fieldStorage.value) != type([]):
+ name = ""
+ if fieldStorage.name == fieldStorage.filename == None:
+ name = "Checksum"
+ else:
+ if len(fieldStorage.value) < 1025:
+ return #don't calculate md5s for really small chunks
+ if fieldStorage.name != None:
+ name = str(fieldStorage.name)
+ if fieldStorage.filename != None:
+ name += ": " + str(fieldStorage.filename)
+ elif fieldStorage.filename != None:
+ name = str(fieldStorage.filename)
+
+ #find next avail. name
+ i=2
+ if name in parts:
+ name += " (2)"
+ while name in parts:
+ i += 1
+ name = name[:-(2+len(str(i)))] + ("(%d)" % i)
+
+ parts[name] = str(fieldStorage.value)
+ else:
+ for i in fieldStorage.value:
+ getParts(i,parts)
+ return parts
+
class FlowCollection:
"""
Collects all flows, gives them an id, decodes content.
@@ -62,9 +96,12 @@ def addFlow(self, flow):
decoded = r.content
#decode with http content-encoding
- ce = r.headers["content-encoding"]
- if ce and ce[0] in encoding.ENCODINGS:
- decoded = encoding.decode(ce[0],r.content)
+ try:
+ ce = r.headers["content-encoding"]
+ if ce and ce[0] in encoding.ENCODINGS:
+ decoded = encoding.decode(ce[0],r.content)
+ except:
+ print "Warning: Data cannot be decoded with given Content Encoding."
#decode with http content-type encoding
ct = r.headers["content-type"]
@@ -85,15 +122,33 @@ def addFlow(self, flow):
print "Warning: Could not decode request."
import traceback
print traceback.format_exc()
-
+
+ try:
+ decoded = decoded.encode('utf-8')
+ except:
+ print "Warning: Cannod encode request to utf8"
decoded_content[i] = decoded
#calculate hashsums
algorithms = ["md5","sha256"]
for i in ["request","response"]:
+ r = getattr(flow,i)
+
flowRepr[i]["contentChecksums"] = {}
+
+ parts = {"Checksum":decoded_content[i]}
+
+ try:
+ headers = dict(map(str.lower, map(str,a)) for a in r.headers) # odict -> (lowered) dict
+ fs = cgi.FieldStorage(StringIO.StringIO(decoded_content[i]),headers,environ={ 'REQUEST_METHOD':'POST' })
+ parts = getParts(fs)
+ except Exception as e:
+ import traceback
+ traceback.print_exc()
+ print "Warning: Cannot decode multipart"
+
#TODO: Analyze request and split it up into parameters to match file upload
- for item, data in (("Checksum",decoded_content[i].encode('utf-8')),):
+ for item, data in parts.viewitems():
checksums = {}
for a in algorithms:
checksums[a] = getattr(hashlib,a)(data).hexdigest()
2 netlib
Submodule netlib updated 1 files
+3 −0 netlib/odict.py
View
@@ -0,0 +1,24 @@
+import StringIO, cgi
+
+content = """-----------------------------265001916915724
+Content-Disposition: form-data; name="F1"; filename="fileA.txt"
+Content-Type: text/plain
+
+I'm file A.
+-----------------------------265001916915724
+Content-Disposition: form-data; name="F2"; filename="fileB.txt"
+Content-Type: text/plain
+
+I'm file B.
+-----------------------------265001916915724
+Content-Disposition: form-data; name="F3"; filename="fileA.txt"
+Content-Type: text/plain
+
+I'm file A2.
+-----------------------------265001916915724--"""
+print len(content)
+headers = {"content-type":"multipart/form-data; boundary=---------------------------265001916915724" }
+
+fp = StringIO.StringIO(content)
+fs = cgi.FieldStorage(fp,headers,environ={ 'REQUEST_METHOD':'POST' })
+print fs

0 comments on commit c447e50

Please sign in to comment.