forked from netfarm/archiver
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
executable file
·125 lines (109 loc) · 3.83 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python
# -*- Mode: Python; tab-width: 4 -*-
#
# Netfarm Mail Archiver - release 2
#
# Copyright (C) 2005-2007 Gianluigi Tiesi <sherpya@netfarm.it>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
# ======================================================================
## @file utils.py
## Common utils
import re
from mimify import mime_decode
from base64 import decodestring
from rfc822 import parseaddr
from md5 import new as MD5
mime_head = re.compile('=\\?(.*?)\\?(\w)\\?([^? \t\n]+)\\?=', re.IGNORECASE)
encodings = { 'q': mime_decode, 'b': decodestring }
CHECKHEADERS = [ 'from', 'subject', 'date', 'message-id', 'x-archiver-id' ]
HASHHEADERS = [ 'message-id', 'from', 'to', 'cc', 'subject' ]
def mime_decode_header(line):
"""workaround to python mime_decode_header
The original code doesn't support base64"""
## TODO: check combined charsets headers
newline = ''
charset = 'latin-1'
pos = 0
while 1:
res = mime_head.search(line, pos)
if res is None:
break
charset = res.group(1)
enctype = res.group(2).lower()
match = res.group(3)
if encodings.has_key(enctype):
match = ' '.join(match.split('_'))
newline = newline + line[pos:res.start(0)] + encodings[enctype](match)
else:
newline = newline + line[pos:res.start(0)] + match
pos = res.end(0)
decoded = newline + line[pos:]
return decoded.decode(charset, 'replace')
def unquote(text):
return ''.join(text.split('"'))
def split_hdr(header, value, dict):
""" Multiline headers splitting"""
hdr = '='.join([header, value]).replace('\r', '').replace('\n', '')
hdr_list = hdr.split(';')
for hdr in hdr_list:
hdr = hdr.strip()
if hdr.find('=') == -1: continue # invalid
key, value = hdr.split('=', 1)
if len(value) == 0: continue # empty
key = key.strip()
value = unquote(value).strip()
dict[key] = value
def parse_message(submsg):
"""Parse a sub message"""
found = None
if submsg.dict.has_key('content-type'):
ct = submsg.dict['content-type']
hd = {}
split_hdr('Content-Type', ct, hd)
if submsg.dict.has_key('content-disposition'):
cd = submsg.dict['content-disposition']
split_hdr('Content-Disposition', cd, hd)
### Hmm nice job clients, filename or name?
if not hd.has_key('name') and hd.has_key('filename'):
hd['name'] = hd['filename']
### Found an attachment
if hd.has_key('name'):
found = { 'name': hd['name'], 'content-type': hd['Content-Type'] }
return found
def dupe_check(headers):
"""Check for duplicate headers
Some headers should be unique"""
check = []
for hdr in headers:
hdr = hdr.strip()
if hdr.find(':') == -1: continue
key = hdr.split(':', 1)[0]
key = key.lower()
if key in check and key in CHECKHEADERS:
return key
check.append(key)
return None
def safe_parseaddr(address):
address = parseaddr(address)[1]
if address is None or (address.find('@') == -1):
return None
l, d = address.split('@', 1)
l = l.strip()
d = d.strip()
if (len(l) == 0) or (len(d) == 0):
return None
return address
def hash_headers(getter):
m = MD5()
for header in HASHHEADERS:
m.update(getter(header, ''))
return m.hexdigest()