forked from ipython/ipython
/
session.py
768 lines (640 loc) · 26.4 KB
/
session.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
"""Session object for building, serializing, sending, and receiving messages in
IPython. The Session object supports serialization, HMAC signatures, and
metadata on messages.
Also defined here are utilities for working with Sessions:
* A SessionFactory to be used as a base class for configurables that work with
Sessions.
* A Message object for convenience that allows attribute-access to the msg dict.
Authors:
* Min RK
* Brian Granger
* Fernando Perez
"""
#-----------------------------------------------------------------------------
# Copyright (C) 2010-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
import hmac
import logging
import os
import pprint
import uuid
from datetime import datetime
try:
import cPickle
pickle = cPickle
except:
cPickle = None
import pickle
import zmq
from zmq.utils import jsonapi
from zmq.eventloop.ioloop import IOLoop
from zmq.eventloop.zmqstream import ZMQStream
from IPython.config.application import Application, boolean_flag
from IPython.config.configurable import Configurable, LoggingConfigurable
from IPython.utils.importstring import import_item
from IPython.utils.jsonutil import extract_dates, squash_dates, date_default
from IPython.utils.py3compat import str_to_bytes
from IPython.utils.traitlets import (CBytes, Unicode, Bool, Any, Instance, Set,
DottedObjectName, CUnicode)
#-----------------------------------------------------------------------------
# utility functions
#-----------------------------------------------------------------------------
def squash_unicode(obj):
"""coerce unicode back to bytestrings."""
if isinstance(obj,dict):
for key in obj.keys():
obj[key] = squash_unicode(obj[key])
if isinstance(key, unicode):
obj[squash_unicode(key)] = obj.pop(key)
elif isinstance(obj, list):
for i,v in enumerate(obj):
obj[i] = squash_unicode(v)
elif isinstance(obj, unicode):
obj = obj.encode('utf8')
return obj
#-----------------------------------------------------------------------------
# globals and defaults
#-----------------------------------------------------------------------------
# jsonlib behaves a bit differently, so handle that where it affects us
if jsonapi.jsonmod.__name__ == 'jsonlib':
# kwarg for serializing unknown types (datetime) is different
dumps_kwargs = dict(on_unknown=date_default)
# By default, jsonlib unpacks floats as Decimal instead of float,
# which can foul things up
loads_kwargs = dict(use_float=True)
else:
# ISO8601-ify datetime objects
dumps_kwargs = dict(default=date_default)
# nothing to specify for loads
loads_kwargs = dict()
json_packer = lambda obj: jsonapi.dumps(obj, **dumps_kwargs)
json_unpacker = lambda s: extract_dates(jsonapi.loads(s, **loads_kwargs))
pickle_packer = lambda o: pickle.dumps(o,-1)
pickle_unpacker = pickle.loads
default_packer = json_packer
default_unpacker = json_unpacker
DELIM=b"<IDS|MSG>"
#-----------------------------------------------------------------------------
# Mixin tools for apps that use Sessions
#-----------------------------------------------------------------------------
session_aliases = dict(
ident = 'Session.session',
user = 'Session.username',
keyfile = 'Session.keyfile',
)
session_flags = {
'secure' : ({'Session' : { 'key' : str_to_bytes(str(uuid.uuid4())),
'keyfile' : '' }},
"""Use HMAC digests for authentication of messages.
Setting this flag will generate a new UUID to use as the HMAC key.
"""),
'no-secure' : ({'Session' : { 'key' : b'', 'keyfile' : '' }},
"""Don't authenticate messages."""),
}
def default_secure(cfg):
"""Set the default behavior for a config environment to be secure.
If Session.key/keyfile have not been set, set Session.key to
a new random UUID.
"""
if 'Session' in cfg:
if 'key' in cfg.Session or 'keyfile' in cfg.Session:
return
# key/keyfile not specified, generate new UUID:
cfg.Session.key = str_to_bytes(str(uuid.uuid4()))
#-----------------------------------------------------------------------------
# Classes
#-----------------------------------------------------------------------------
class SessionFactory(LoggingConfigurable):
"""The Base class for configurables that have a Session, Context, logger,
and IOLoop.
"""
logname = Unicode('')
def _logname_changed(self, name, old, new):
self.log = logging.getLogger(new)
# not configurable:
context = Instance('zmq.Context')
def _context_default(self):
return zmq.Context.instance()
session = Instance('IPython.zmq.session.Session')
loop = Instance('zmq.eventloop.ioloop.IOLoop', allow_none=False)
def _loop_default(self):
return IOLoop.instance()
def __init__(self, **kwargs):
super(SessionFactory, self).__init__(**kwargs)
if self.session is None:
# construct the session
self.session = Session(**kwargs)
class Message(object):
"""A simple message object that maps dict keys to attributes.
A Message can be created from a dict and a dict from a Message instance
simply by calling dict(msg_obj)."""
def __init__(self, msg_dict):
dct = self.__dict__
for k, v in dict(msg_dict).iteritems():
if isinstance(v, dict):
v = Message(v)
dct[k] = v
# Having this iterator lets dict(msg_obj) work out of the box.
def __iter__(self):
return iter(self.__dict__.iteritems())
def __repr__(self):
return repr(self.__dict__)
def __str__(self):
return pprint.pformat(self.__dict__)
def __contains__(self, k):
return k in self.__dict__
def __getitem__(self, k):
return self.__dict__[k]
def msg_header(msg_id, msg_type, username, session):
date = datetime.now()
return locals()
def extract_header(msg_or_header):
"""Given a message or header, return the header."""
if not msg_or_header:
return {}
try:
# See if msg_or_header is the entire message.
h = msg_or_header['header']
except KeyError:
try:
# See if msg_or_header is just the header
h = msg_or_header['msg_id']
except KeyError:
raise
else:
h = msg_or_header
if not isinstance(h, dict):
h = dict(h)
return h
class Session(Configurable):
"""Object for handling serialization and sending of messages.
The Session object handles building messages and sending them
with ZMQ sockets or ZMQStream objects. Objects can communicate with each
other over the network via Session objects, and only need to work with the
dict-based IPython message spec. The Session will handle
serialization/deserialization, security, and metadata.
Sessions support configurable serialiization via packer/unpacker traits,
and signing with HMAC digests via the key/keyfile traits.
Parameters
----------
debug : bool
whether to trigger extra debugging statements
packer/unpacker : str : 'json', 'pickle' or import_string
importstrings for methods to serialize message parts. If just
'json' or 'pickle', predefined JSON and pickle packers will be used.
Otherwise, the entire importstring must be used.
The functions must accept at least valid JSON input, and output *bytes*.
For example, to use msgpack:
packer = 'msgpack.packb', unpacker='msgpack.unpackb'
pack/unpack : callables
You can also set the pack/unpack callables for serialization directly.
session : bytes
the ID of this Session object. The default is to generate a new UUID.
username : unicode
username added to message headers. The default is to ask the OS.
key : bytes
The key used to initialize an HMAC signature. If unset, messages
will not be signed or checked.
keyfile : filepath
The file containing a key. If this is set, `key` will be initialized
to the contents of the file.
"""
debug=Bool(False, config=True, help="""Debug output in the Session""")
packer = DottedObjectName('json',config=True,
help="""The name of the packer for serializing messages.
Should be one of 'json', 'pickle', or an import name
for a custom callable serializer.""")
def _packer_changed(self, name, old, new):
if new.lower() == 'json':
self.pack = json_packer
self.unpack = json_unpacker
elif new.lower() == 'pickle':
self.pack = pickle_packer
self.unpack = pickle_unpacker
else:
self.pack = import_item(str(new))
unpacker = DottedObjectName('json', config=True,
help="""The name of the unpacker for unserializing messages.
Only used with custom functions for `packer`.""")
def _unpacker_changed(self, name, old, new):
if new.lower() == 'json':
self.pack = json_packer
self.unpack = json_unpacker
elif new.lower() == 'pickle':
self.pack = pickle_packer
self.unpack = pickle_unpacker
else:
self.unpack = import_item(str(new))
session = CUnicode(u'', config=True,
help="""The UUID identifying this session.""")
def _session_default(self):
u = unicode(uuid.uuid4())
self.bsession = u.encode('ascii')
return u
def _session_changed(self, name, old, new):
self.bsession = self.session.encode('ascii')
# bsession is the session as bytes
bsession = CBytes(b'')
username = Unicode(os.environ.get('USER',u'username'), config=True,
help="""Username for the Session. Default is your system username.""")
# message signature related traits:
key = CBytes(b'', config=True,
help="""execution key, for extra authentication.""")
def _key_changed(self, name, old, new):
if new:
self.auth = hmac.HMAC(new)
else:
self.auth = None
auth = Instance(hmac.HMAC)
digest_history = Set()
keyfile = Unicode('', config=True,
help="""path to file containing execution key.""")
def _keyfile_changed(self, name, old, new):
with open(new, 'rb') as f:
self.key = f.read().strip()
# serialization traits:
pack = Any(default_packer) # the actual packer function
def _pack_changed(self, name, old, new):
if not callable(new):
raise TypeError("packer must be callable, not %s"%type(new))
unpack = Any(default_unpacker) # the actual packer function
def _unpack_changed(self, name, old, new):
# unpacker is not checked - it is assumed to be
if not callable(new):
raise TypeError("unpacker must be callable, not %s"%type(new))
def __init__(self, **kwargs):
"""create a Session object
Parameters
----------
debug : bool
whether to trigger extra debugging statements
packer/unpacker : str : 'json', 'pickle' or import_string
importstrings for methods to serialize message parts. If just
'json' or 'pickle', predefined JSON and pickle packers will be used.
Otherwise, the entire importstring must be used.
The functions must accept at least valid JSON input, and output
*bytes*.
For example, to use msgpack:
packer = 'msgpack.packb', unpacker='msgpack.unpackb'
pack/unpack : callables
You can also set the pack/unpack callables for serialization
directly.
session : unicode (must be ascii)
the ID of this Session object. The default is to generate a new
UUID.
bsession : bytes
The session as bytes
username : unicode
username added to message headers. The default is to ask the OS.
key : bytes
The key used to initialize an HMAC signature. If unset, messages
will not be signed or checked.
keyfile : filepath
The file containing a key. If this is set, `key` will be
initialized to the contents of the file.
"""
super(Session, self).__init__(**kwargs)
self._check_packers()
self.none = self.pack({})
# ensure self._session_default() if necessary, so bsession is defined:
self.session
@property
def msg_id(self):
"""always return new uuid"""
return str(uuid.uuid4())
def _check_packers(self):
"""check packers for binary data and datetime support."""
pack = self.pack
unpack = self.unpack
# check simple serialization
msg = dict(a=[1,'hi'])
try:
packed = pack(msg)
except Exception:
raise ValueError("packer could not serialize a simple message")
# ensure packed message is bytes
if not isinstance(packed, bytes):
raise ValueError("message packed to %r, but bytes are required"%type(packed))
# check that unpack is pack's inverse
try:
unpacked = unpack(packed)
except Exception:
raise ValueError("unpacker could not handle the packer's output")
# check datetime support
msg = dict(t=datetime.now())
try:
unpacked = unpack(pack(msg))
except Exception:
self.pack = lambda o: pack(squash_dates(o))
self.unpack = lambda s: extract_dates(unpack(s))
def msg_header(self, msg_type):
return msg_header(self.msg_id, msg_type, self.username, self.session)
def msg(self, msg_type, content=None, parent=None, subheader=None, header=None):
"""Return the nested message dict.
This format is different from what is sent over the wire. The
serialize/unserialize methods converts this nested message dict to the wire
format, which is a list of message parts.
"""
msg = {}
header = self.msg_header(msg_type) if header is None else header
msg['header'] = header
msg['msg_id'] = header['msg_id']
msg['msg_type'] = header['msg_type']
msg['parent_header'] = {} if parent is None else extract_header(parent)
msg['content'] = {} if content is None else content
sub = {} if subheader is None else subheader
msg['header'].update(sub)
return msg
def sign(self, msg_list):
"""Sign a message with HMAC digest. If no auth, return b''.
Parameters
----------
msg_list : list
The [p_header,p_parent,p_content] part of the message list.
"""
if self.auth is None:
return b''
h = self.auth.copy()
for m in msg_list:
h.update(m)
return str_to_bytes(h.hexdigest())
def serialize(self, msg, ident=None):
"""Serialize the message components to bytes.
This is roughly the inverse of unserialize. The serialize/unserialize
methods work with full message lists, whereas pack/unpack work with
the individual message parts in the message list.
Parameters
----------
msg : dict or Message
The nexted message dict as returned by the self.msg method.
Returns
-------
msg_list : list
The list of bytes objects to be sent with the format:
[ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content,
buffer1,buffer2,...]. In this list, the p_* entities are
the packed or serialized versions, so if JSON is used, these
are utf8 encoded JSON strings.
"""
content = msg.get('content', {})
if content is None:
content = self.none
elif isinstance(content, dict):
content = self.pack(content)
elif isinstance(content, bytes):
# content is already packed, as in a relayed message
pass
elif isinstance(content, unicode):
# should be bytes, but JSON often spits out unicode
content = content.encode('utf8')
else:
raise TypeError("Content incorrect type: %s"%type(content))
real_message = [self.pack(msg['header']),
self.pack(msg['parent_header']),
content
]
to_send = []
if isinstance(ident, list):
# accept list of idents
to_send.extend(ident)
elif ident is not None:
to_send.append(ident)
to_send.append(DELIM)
signature = self.sign(real_message)
to_send.append(signature)
to_send.extend(real_message)
return to_send
def send(self, stream, msg_or_type, content=None, parent=None, ident=None,
buffers=None, subheader=None, track=False, header=None):
"""Build and send a message via stream or socket.
The message format used by this function internally is as follows:
[ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content,
buffer1,buffer2,...]
The serialize/unserialize methods convert the nested message dict into this
format.
Parameters
----------
stream : zmq.Socket or ZMQStream
The socket-like object used to send the data.
msg_or_type : str or Message/dict
Normally, msg_or_type will be a msg_type unless a message is being
sent more than once. If a header is supplied, this can be set to
None and the msg_type will be pulled from the header.
content : dict or None
The content of the message (ignored if msg_or_type is a message).
header : dict or None
The header dict for the message (ignores if msg_to_type is a message).
parent : Message or dict or None
The parent or parent header describing the parent of this message
(ignored if msg_or_type is a message).
ident : bytes or list of bytes
The zmq.IDENTITY routing path.
subheader : dict or None
Extra header keys for this message's header (ignored if msg_or_type
is a message).
buffers : list or None
The already-serialized buffers to be appended to the message.
track : bool
Whether to track. Only for use with Sockets, because ZMQStream
objects cannot track messages.
Returns
-------
msg : dict
The constructed message.
(msg,tracker) : (dict, MessageTracker)
if track=True, then a 2-tuple will be returned,
the first element being the constructed
message, and the second being the MessageTracker
"""
if not isinstance(stream, (zmq.Socket, ZMQStream)):
raise TypeError("stream must be Socket or ZMQStream, not %r"%type(stream))
elif track and isinstance(stream, ZMQStream):
raise TypeError("ZMQStream cannot track messages")
if isinstance(msg_or_type, (Message, dict)):
# We got a Message or message dict, not a msg_type so don't
# build a new Message.
msg = msg_or_type
else:
msg = self.msg(msg_or_type, content=content, parent=parent,
subheader=subheader, header=header)
buffers = [] if buffers is None else buffers
to_send = self.serialize(msg, ident)
flag = 0
if buffers:
flag = zmq.SNDMORE
_track = False
else:
_track=track
if track:
tracker = stream.send_multipart(to_send, flag, copy=False, track=_track)
else:
tracker = stream.send_multipart(to_send, flag, copy=False)
for b in buffers[:-1]:
stream.send(b, flag, copy=False)
if buffers:
if track:
tracker = stream.send(buffers[-1], copy=False, track=track)
else:
tracker = stream.send(buffers[-1], copy=False)
# omsg = Message(msg)
if self.debug:
pprint.pprint(msg)
pprint.pprint(to_send)
pprint.pprint(buffers)
msg['tracker'] = tracker
return msg
def send_raw(self, stream, msg_list, flags=0, copy=True, ident=None):
"""Send a raw message via ident path.
This method is used to send a already serialized message.
Parameters
----------
stream : ZMQStream or Socket
The ZMQ stream or socket to use for sending the message.
msg_list : list
The serialized list of messages to send. This only includes the
[p_header,p_parent,p_content,buffer1,buffer2,...] portion of
the message.
ident : ident or list
A single ident or a list of idents to use in sending.
"""
to_send = []
if isinstance(ident, bytes):
ident = [ident]
if ident is not None:
to_send.extend(ident)
to_send.append(DELIM)
to_send.append(self.sign(msg_list))
to_send.extend(msg_list)
stream.send_multipart(msg_list, flags, copy=copy)
def recv(self, socket, mode=zmq.NOBLOCK, content=True, copy=True):
"""Receive and unpack a message.
Parameters
----------
socket : ZMQStream or Socket
The socket or stream to use in receiving.
Returns
-------
[idents], msg
[idents] is a list of idents and msg is a nested message dict of
same format as self.msg returns.
"""
if isinstance(socket, ZMQStream):
socket = socket.socket
try:
msg_list = socket.recv_multipart(mode)
except zmq.ZMQError as e:
if e.errno == zmq.EAGAIN:
# We can convert EAGAIN to None as we know in this case
# recv_multipart won't return None.
return None,None
else:
raise
# split multipart message into identity list and message dict
# invalid large messages can cause very expensive string comparisons
idents, msg_list = self.feed_identities(msg_list, copy)
try:
return idents, self.unserialize(msg_list, content=content, copy=copy)
except Exception as e:
# TODO: handle it
raise e
def feed_identities(self, msg_list, copy=True):
"""Split the identities from the rest of the message.
Feed until DELIM is reached, then return the prefix as idents and
remainder as msg_list. This is easily broken by setting an IDENT to DELIM,
but that would be silly.
Parameters
----------
msg_list : a list of Message or bytes objects
The message to be split.
copy : bool
flag determining whether the arguments are bytes or Messages
Returns
-------
(idents, msg_list) : two lists
idents will always be a list of bytes, each of which is a ZMQ
identity. msg_list will be a list of bytes or zmq.Messages of the
form [HMAC,p_header,p_parent,p_content,buffer1,buffer2,...] and
should be unpackable/unserializable via self.unserialize at this
point.
"""
if copy:
idx = msg_list.index(DELIM)
return msg_list[:idx], msg_list[idx+1:]
else:
failed = True
for idx,m in enumerate(msg_list):
if m.bytes == DELIM:
failed = False
break
if failed:
raise ValueError("DELIM not in msg_list")
idents, msg_list = msg_list[:idx], msg_list[idx+1:]
return [m.bytes for m in idents], msg_list
def unserialize(self, msg_list, content=True, copy=True):
"""Unserialize a msg_list to a nested message dict.
This is roughly the inverse of serialize. The serialize/unserialize
methods work with full message lists, whereas pack/unpack work with
the individual message parts in the message list.
Parameters:
-----------
msg_list : list of bytes or Message objects
The list of message parts of the form [HMAC,p_header,p_parent,
p_content,buffer1,buffer2,...].
content : bool (True)
Whether to unpack the content dict (True), or leave it packed
(False).
copy : bool (True)
Whether to return the bytes (True), or the non-copying Message
object in each place (False).
Returns
-------
msg : dict
The nested message dict with top-level keys [header, parent_header,
content, buffers].
"""
minlen = 4
message = {}
if not copy:
for i in range(minlen):
msg_list[i] = msg_list[i].bytes
if self.auth is not None:
signature = msg_list[0]
if not signature:
raise ValueError("Unsigned Message")
if signature in self.digest_history:
raise ValueError("Duplicate Signature: %r"%signature)
self.digest_history.add(signature)
check = self.sign(msg_list[1:4])
if not signature == check:
raise ValueError("Invalid Signature: %r"%signature)
if not len(msg_list) >= minlen:
raise TypeError("malformed message, must have at least %i elements"%minlen)
header = self.unpack(msg_list[1])
message['header'] = header
message['msg_id'] = header['msg_id']
message['msg_type'] = header['msg_type']
message['parent_header'] = self.unpack(msg_list[2])
if content:
message['content'] = self.unpack(msg_list[3])
else:
message['content'] = msg_list[3]
message['buffers'] = msg_list[4:]
return message
def test_msg2obj():
am = dict(x=1)
ao = Message(am)
assert ao.x == am['x']
am['y'] = dict(z=1)
ao = Message(am)
assert ao.y.z == am['y']['z']
k1, k2 = 'y', 'z'
assert ao[k1][k2] == am[k1][k2]
am2 = dict(ao)
assert am['x'] == am2['x']
assert am['y']['z'] == am2['y']['z']