# Sequence Hacking, Hashing, and Slicing

In [1]:
from array import array
import reprlib
import math

class Vector:
    typecode = 'd'
    
    def __init__(self, components):
        self._components = array(self.typecode, components)

    def __iter__(self):
        return iter(self._components) 
    
    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)

    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._components))
    
    def __eq__(self, other):
        return tuple(self) == tuple(other)
    
    def __abs__(self):
        return math.sqrt(sum(x * x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)


## Protocols and Duck Typing
As early as Chapter 1, we saw that you don’t need to inherit from any special class to create a fully functional sequence type in Python; you just need to implement the methods that fulfill the sequence protocol. For example, the sequence protocol in Python entails just the __len__ and __getitem__ methods.

In [5]:
import collections

Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()

    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits
                                        for rank in self.ranks]

    def __len__(self):
        return len(self._cards)

    def __getitem__(self, position):
        return self._cards[position]


Any experienced Python coder will look at it and understand that it is a sequence, even if it subclasses object. We say it is a sequence because it behaves like one, and that is what matters. This became known as duck typing.

In [7]:
deck = FrenchDeck()
for index, card in enumerate(deck):
    print(card)
    if index > 5:
        break


Card(rank='2', suit='spades')
Card(rank='3', suit='spades')
Card(rank='4', suit='spades')
Card(rank='5', suit='spades')
Card(rank='6', suit='spades')
Card(rank='7', suit='spades')
Card(rank='8', suit='spades')


## How Slicing Works

In [8]:
class MySeq():
    def __getitem__(self, index):
        return index


In [9]:
s = MySeq()

In [10]:
s[1]

1

In [11]:
s[1:4]

slice(1, 4, None)

In [12]:
s[1:4:2]

slice(1, 4, 2)

In [13]:
 s[1:4:2, 9]

(slice(1, 4, 2), 9)

In [14]:
 s[1:4:2, 7:9]

(slice(1, 4, 2), slice(7, 9, None))

In [15]:
slice

slice

In [16]:
dir(slice)

['__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'indices',
 'start',
 'step',
 'stop']

slice is a built-in type. Inspecting a slice we find the data attributes start, stop, and step, and an
indices method.

In [17]:
help(slice.indices)

Help on method_descriptor:

indices(...)
    S.indices(len) -> (start, stop, stride)
    
    Assuming a sequence of length len, calculate the start and stop
    indices, and the stride length of the extended slice described by
    S. Out of bounds indices are clipped in a manner consistent with the
    handling of normal slices.



## A Slice-Aware __getitem__

In [9]:
from array import array
import reprlib
import math
import numbers


class Vector:
    typecode = 'd'
    
    def __init__(self, components):
        self._components = array(self.typecode, components)

    def __iter__(self):
        return iter(self._components) 
    
    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)

    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._components))
    
    def __eq__(self, other):
        return tuple(self) == tuple(other)
    
    def __abs__(self):
        return math.sqrt(sum(x * x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)

    ### ADDING NEW LINES HERE ###
    def __len__(self):
        return len(self._components)

    def __getitem__(self, index):
        cls = type(self)
        if isinstance(index, slice):
            return cls(self._components[index])
        elif isinstance(index, numbers.Integral):
            return self._components[index]
        else:
            msg = '{cls.__name__} indices must be integers'
            raise TypeError(msg.format(cls=cls))


In [10]:
v7 = Vector(range(7))

In [11]:
v7[-1]

6.0

In [12]:
v7[1:4]

Vector([1.0, 2.0, 3.0])

In [13]:
v7[-1:] 

Vector([6.0])

Vector does not support multidimensional indexing, so a tuple of indices or slices raises an error.

In [32]:
import traceback

try:
    v7[1,2]
except:
    traceback.print_exc()

Traceback (most recent call last):
  File "<ipython-input-32-2d8ff80f2d92>", line 4, in <module>
    v7[1,2]
  File "<ipython-input-27-9cd5af5b02ea>", line 54, in __getitem__
    raise TypeError(msg.format(cls=cls))
TypeError: Vector indices must be integers


## Vector Take #3: Dynamic Attribute Access

In [3]:
v = Vector(range(10))
v

Vector([0.0, 1.0, 2.0, 3.0, 4.0, ...])

In [5]:
import traceback

try:
    v.x
except:
    traceback.print_exc()


Traceback (most recent call last):
  File "<ipython-input-5-0f0ab9e9411d>", line 4, in <module>
    v.x
AttributeError: 'Vector' object has no attribute 'x'


The __getattr__ method is invoked by the interpreter when attribute lookup fails. In simple terms, given the expression my_obj.x, Python checks if the my_obj instance has an attribute named x; if not, the search goes to the class (my_obj.__class__), and then up the inheritance graph.2
 If the x attribute is not found, then the __getattr__ method defined in the class of my_obj is called with self and the name of the attribute as a string (e.g., 'x').

In [16]:
class VectorNew(Vector):
    shortcut_names = 'xyzt'
    
    def __init__(self, components):
        super(VectorNew, self).__init__(components)

    def __getattr__(self, name):
        cls = type(self)
        if len(name) == 1:
            pos = cls.shortcut_names.find(name)
        if 0 <= pos < len(self._components):
            return self._components[pos]
        msg = '{.__name__!r} object has no attribute {!r}'
        raise AttributeError(msg.format(cls, name))


In [18]:
v = VectorNew(range(10))
v

Vector([0.0, 1.0, 2.0, 3.0, 4.0, ...])

In [19]:
v.x

0.0

In [20]:
v.y

1.0

## Vector Take #4: Hashing and a Faster ==
Once more we get to implement a __hash__ method. Together with the existing __eq__, this will make Vector instances hashable.

In [21]:
2 * 3 * 4 * 5 # the result we want: 5! == 120

120

In [22]:
import functools
functools.reduce(lambda a,b: a*b, range(1, 6))

120

A hash is an fixed sized integer that identifies a particular value:
-  https://stackoverflow.com/questions/17585730/what-does-hash-do-in-python
-  https://en.wikipedia.org/wiki/Hash_function

In [24]:
[hash(x) for x in range(10)]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [26]:
hash("some string")

1354895843535490086

In [25]:
[hash(x) for x in "some string"]

[-2780883236376314179,
 6198797113548795930,
 -2993550616971676124,
 -919541438431412375,
 -271363139114298016,
 -2780883236376314179,
 824652864821228760,
 -3602323100728165134,
 -3898281694142389048,
 -1209787511890185820,
 -3191305034800649506]

In [27]:
import operator

help(operator.xor)

Help on built-in function xor in module _operator:

xor(...)
    xor(a, b) -- Same as a ^ b.



Exclusive or (XOR, EOR or EXOR) outputs true whenever the inputs differ.

In [43]:
operator.xor(1,2), bool(operator.xor(1,2))

(3, True)

In [44]:
operator.xor(2,3), bool(operator.xor(2,3))

(1, True)

In [45]:
operator.xor(10,11), bool(operator.xor(10,11))

(1, True)

In [46]:
operator.xor(1,7), bool(operator.xor(1,7))

(6, True)

In [47]:
operator.xor(7,7), bool(operator.xor(7,7))

(0, False)

In [49]:
operator.xor(124,124), bool(operator.xor(124,124))

(0, False)

In [50]:
class VectorNew2(VectorNew):
    
    def __init__(self, components):
        super(VectorNew2, self).__init__(components)

    def __eq__(self, other):
        return tuple(self) == tuple(other)

    def __hash__(self):
        hashes = (hash(x) for x in self._components)
        return functools.reduce(operator.xor, hashes, 0) 


In [53]:
vector = VectorNew2(range(10))
vector

Vector([0.0, 1.0, 2.0, 3.0, 4.0, ...])

In [54]:
hash(vector)

1

It’s good practice to keep __eq__ and __hash__ close in source code, because they need to work together.