Skip to content
Newer
Older
100755 262 lines (203 sloc) 8.66 KB
089d258 working script
R. H. Gracini Guiraldelli authored
1 #!/usr/bin/env python
2
3 # BSD LICENSE:
4 # Copyright (c) 2011, Ricardo H Gracini Guiraldelli <rguira@acm.org>
53e7893 @lucasdemarchi Add additional copyrights
authored
5 # Copyright (c) 2011, Pedro Pedruzzi <pedro.pedruzzi@gmail.com>
6 # Copyright (c) 2011, Lucas De Marchi <lucas.de.marchi@gmail.com>
089d258 working script
R. H. Gracini Guiraldelli authored
7 # All rights reserved.
1b1067f @lucasdemarchi Remove trailing whitespaces
authored
8 #
089d258 working script
R. H. Gracini Guiraldelli authored
9 # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1b1067f @lucasdemarchi Remove trailing whitespaces
authored
10 #
089d258 working script
R. H. Gracini Guiraldelli authored
11 # Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12 # Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
13 # Neither the name of the Ricardo H Gracini Guiraldelli nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
14 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
15
16 import imaplib
17 import re
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
18 import rfc822
19 import StringIO
20 import email.header
21 import getpass
22 import os
76356ed @lucasdemarchi import sys
authored
23 import sys
6c82bc9 @lucasdemarchi Add option parser support
authored
24 from optparse import OptionParser
25
26 USAGE = "%prog [OPTIONS]"
27 VERSION = '0.1'
340f305 @lucasdemarchi Move encoding setup to its own function
authored
28 default_encoding = sys.stdout.encoding
a826d91 @lucasdemarchi Add command line options
authored
29 options = None
30 options_defaults = {
31 'host': 'imap.gmail.com',
32 'port': '993',
33 'size': '10'
34 }
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
35
36 # copied from http://docs.python.org/library/imaplib.html
37 list_response_pattern = re.compile(r'\((?P<flags>.*?)\) "(?P<delimiter>.*)" (?P<name>.*)')
ab8c3d4 @lucasdemarchi Re-organize function order
authored
38
39 #imaplib.Debug = 4
40
340f305 @lucasdemarchi Move encoding setup to its own function
authored
41 def setup_encoding():
42 global default_encoding
43
44 if not default_encoding or not sys.stdout.isatty():
45 import locale
46 default_encoding = locale.getpreferredencoding()
47
48 if not default_encoding:
49 default_encoding = 'ascii'
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
50
51 def parse_list_response(line):
52 flags, delimiter, mailbox_name = list_response_pattern.match(line).groups()
53 mailbox_name = mailbox_name.strip('"')
54 return (flags, delimiter, mailbox_name)
55
56 # FIXME: not sure if it works always. see: http://bugs.python.org/issue5305
57 def decode_modified_utf7(s):
58 ascii_mode = 1
d119f47 @pedrox fix decode_modified_utf7 logic. it was broken.
pedrox authored
59 r = [ 0 ] * len(s)
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
60 for i in range(len(s)):
d119f47 @pedrox fix decode_modified_utf7 logic. it was broken.
pedrox authored
61 r[i] = s[i]
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
62 if ascii_mode:
d119f47 @pedrox fix decode_modified_utf7 logic. it was broken.
pedrox authored
63 if r[i] == '&':
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
64 ascii_mode = 0
d119f47 @pedrox fix decode_modified_utf7 logic. it was broken.
pedrox authored
65 r[i] = '+'
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
66 else:
d119f47 @pedrox fix decode_modified_utf7 logic. it was broken.
pedrox authored
67 if r[i] == ',':
68 r[i] = '/'
69 elif r[i] == '-':
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
70 ascii_mode = 1
d119f47 @pedrox fix decode_modified_utf7 logic. it was broken.
pedrox authored
71 # list -> str
72 r = ''.join(r)
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
73 # workaround for http://bugs.python.org/issue4425
d119f47 @pedrox fix decode_modified_utf7 logic. it was broken.
pedrox authored
74 r = r.replace('/', '+AC8-')
75 r = r.decode('utf7')
76 return r
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
77
ab8c3d4 @lucasdemarchi Re-organize function order
authored
78 def fetch_dump_subject(conn, message_set):
79 if not message_set:
80 return
81
82 status, data = conn.fetch(message_set, '(BODY[HEADER.FIELDS (SUBJECT)])')
83 for piece in data:
84 if isinstance(piece, tuple):
85 dump_subject(piece[1])
86
87 def dump_subject(header):
88 # workaround for http://bugs.python.org/issue504152
89 header = header.replace('\r\n ', ' ')
90 msg = rfc822.Message(StringIO.StringIO(header))
91 sub = msg["subject"]
92 data = email.header.decode_header(sub)
93 sub = data[0][0]
94 subcharset = data[0][1]
95 if subcharset != None:
96 sub = sub.decode(subcharset)
97 safe_print('\tSubject: [%s].' % (sub))
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
98
e8796fc @pedrox replaces print with safe_print to avoid unmapped unicode characteres …
pedrox authored
99 def safe_print(u):
c37658c @lucasdemarchi Guess the encoding to use
authored
100 u = u.encode(default_encoding, 'replace')
82acced @lucasdemarchi treat print as a function call
authored
101 print(u)
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
102
a826d91 @lucasdemarchi Add command line options
authored
103 def input_or_default(prompt, option):
104 global options
105
106 if not (option is None) and not (options.__dict__[option] is None):
107 return options.__dict__[option]
108
109 if option in options_defaults:
110 ret = raw_input("%s [ %s ]: " % (prompt, options_defaults[option]))
111 if len(ret) == 0:
112 ret = options_defaults[option]
113 else:
114 ret = raw_input("%s: " % prompt)
115
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
116 return ret
117
118 def process(host, port, username, password, size, use_ssl=False):
119 # FIXME: make this a parameter
120 dest = 'BIGMAIL'
121
bf6ca2d @lucasdemarchi function calls need parentheses
authored
122 safe_print("\t Connecting to %s:%d..." % (host, port))
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
123
124 # connect to IMAP server
125 if use_ssl:
126 imap_connection = imaplib.IMAP4_SSL(host, port)
127 else:
128 imap_connection = imaplib.IMAP4(host, port)
129
130 # authenticate by plain-text login
131 imap_connection.login(username, password)
132
133 # list and print mailboxes
134 status, boxes = imap_connection.list()
135
136 box = 1
137
138 # FIXME: this function should not be interactive
139 for ibox in range(len(boxes)):
140 # TODO: filter \Noselect flagged mailboxes
141 boxes[ibox] = parse_list_response(boxes[ibox])[2]
142
143 decoded = decode_modified_utf7(boxes[ibox])
144 if decoded == '[Gmail]/All Mail':
145 box = ibox + 1
bf6ca2d @lucasdemarchi function calls need parentheses
authored
146 safe_print("%d. %s" % (ibox + 1, decoded))
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
147
148 # prompt for a mailbox
a826d91 @lucasdemarchi Add command line options
authored
149 box = boxes[int(input_or_default("Mailbox", None)) - 1]
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
150
151 # select mailbox
152 status, data = imap_connection.select(box)
153 if status == 'NO':
bf6ca2d @lucasdemarchi function calls need parentheses
authored
154 safe_print(data)
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
155
156 # print mailbox status
bf6ca2d @lucasdemarchi function calls need parentheses
authored
157 safe_print("\tYou have %s messages in mailbox '%s'." % (data[0], decode_modified_utf7(box)))
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
158
159 remsgsize = re.compile("(\d+) \(RFC822.SIZE (\d+).*\)")
160
161 msg_set = StringIO.StringIO()
162
bf6ca2d @lucasdemarchi function calls need parentheses
authored
163 safe_print("\tLooking up big e-mails...")
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
164
165 status, data = imap_connection.fetch('1:*', '(RFC822.SIZE)')
166 count = 0
167 for msg in data:
168 match = remsgsize.match(msg)
169 msgid = int(match.group(1))
170 msgsize = int(match.group(2))
171
172 if msgsize >= size:
bf6ca2d @lucasdemarchi function calls need parentheses
authored
173 #safe_print("to move: id=" + str(msgid) + ", size=" + str(msgsize))
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
174 msg_set.write(str(msgid))
175 msg_set.write(",")
176 count = count + 1
177
178 # remove trailing comma
179 msg_set.seek(-1, os.SEEK_CUR)
180 msg_set.truncate()
181
182 # StringIO -> str
183 msg_set = msg_set.getvalue()
184
5269837 @lucasdemarchi Fix case when 0 emails were found
authored
185 safe_print("\tDone. %d e-mails found." % count)
85264b8 @lucasdemarchi Rename non-test functions
authored
186 fetch_dump_subject(imap_connection, msg_set)
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
187
5269837 @lucasdemarchi Fix case when 0 emails were found
authored
188 if count == 0:
189 safe_print("\tNothing to do. Closing connection")
190 else:
191 safe_print("\tCopying emails to mailbox '%s'..." % dest)
192
193 # create destination mailbox, if new
194 status, data = imap_connection.create(dest)
195 if status == 'NO':
196 pass
197 # we can ignore this failure assuming it is about a preexisting mailbox
198 # if it is not the case, than the copy will fail next
199
200 # copy to destination mailbox
201 status, data = imap_connection.copy(msg_set, dest)
202 if status == 'NO':
203 safe_print(data)
204
205 # TODO: remove e-mails from original mailbox when it makes sense
206 # users generally want to _move_ big e-mails to separate mailboxes.
207 # however, some mail servers (like google's for instance) have a label/tag
208 # semantics for mailboxes thus making no point in removing a big e-mail
209 # from such a mailbox.
210 safe_print("\tDone! Closing connection")
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
211
212 # close and sync selected mailbox
213 imap_connection.close()
214
215 # logout and close connection
216 imap_connection.logout()
217 imap_connection.shutdown()
218
219
ab8c3d4 @lucasdemarchi Re-organize function order
authored
220 def parse_options(args):
221 parser = OptionParser(usage=USAGE, version=VERSION)
a826d91 @lucasdemarchi Add command line options
authored
222 parser.add_option('-H', '--host', type='string',
223 help='IMAP server hostname')
224 parser.add_option('-p', '--port', type='string',
225 help='IMAP server port')
226 parser.add_option('-u', '-U', '--user', type='string',
227 help='IMAP username')
228 parser.add_option('-s', '--size', type='string',
229 help='Min email size to search for')
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
230
ab8c3d4 @lucasdemarchi Re-organize function order
authored
231 return parser.parse_args()
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
232
ab8c3d4 @lucasdemarchi Re-organize function order
authored
233 def main(*args):
a826d91 @lucasdemarchi Add command line options
authored
234 global options
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
235
a826d91 @lucasdemarchi Add command line options
authored
236 options, args = parse_options(args)
237 setup_encoding()
ab8c3d4 @lucasdemarchi Re-organize function order
authored
238
a826d91 @lucasdemarchi Add command line options
authored
239 host = input_or_default('IMAP server hostname', 'host')
240 port = int(input_or_default('IMAP server port', 'port'))
241 size = int(input_or_default('Minimum size in MB', 'size'))
ab8c3d4 @lucasdemarchi Re-organize function order
authored
242
a826d91 @lucasdemarchi Add command line options
authored
243 username = input_or_default('Login', 'user')
ab8c3d4 @lucasdemarchi Re-organize function order
authored
244 password = getpass.getpass('Password: ')
245
246 # convert to bytes
247 size = size * 1024 * 1024
248
249 process(host, port, username, password, size, True)
4fe76c7 @pedrox a lot of improvements. to summarize:
pedrox authored
250
251
6c82bc9 @lucasdemarchi Add option parser support
authored
252 if __name__ == '__main__':
253 sys.exit(main(*sys.argv))
ab8c3d4 @lucasdemarchi Re-organize function order
authored
254
255
256 ########################## TESTS
257
258 def test():
259 process('imap.gmail.com', 993, 'any@gmail.com', 'thing', 10 * 1024 * 1024, True)
260
261 #test()
Something went wrong with that request. Please try again.