# 第10章 序列的修改、散列和切片

## 10.1 Vector类: 用户自定义的序列类型
我们将使用组合模式实现Vector类, 而不是使用继承，向量的分量储存在浮点数数组中，而且还将实现不可变扁平序列所需的方法。

## 10.2 Vetctor类第一版：与Vector2d类兼容

In [1]:
# 这是第一版的Vector
from array import array
import reprlib
import math

class Vector(object):
    typecode = 'd'
    
    def __init__(self, components):
        self._components = array(self.typecode, components)
        
    def __iter__(self):
        return iter(self._components)
    
    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._components))
    
    def __eq__(self, other):
        return tuple(self) == tuple(other)  #　tuple(iter)将可迭代的对象转为元组
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)

In [2]:
Vector([3.1, 4.2])

Vector([3.1, 4.2])

In [4]:
Vector((3, 4, 5))

Vector([3.0, 4.0, 5.0])

In [5]:
Vector(range(10))

Vector([0.0, 1.0, 2.0, 3.0, 4.0, ...])

In [7]:
abs(Vector([3.1, 4.2]))

5.220153254455275

## 10.3 协议和鸭子类型
在第1章我们就说过, 在Python中创建功能完善的序列类型无需使用继承, 只需要实现符合序列协议的方法，

在面向对象编程中，协议是非正式的接口，只在文档中定义，在代码中不定义。

## 10.4 Vector类第2版：可切片的序列

In [18]:
# 这是第一版的Vector
from array import array
import reprlib
import math

class Vector(object):
    typecode = 'd'
    
    def __init__(self, components):
        self._components = array(self.typecode, components)
        
    def __iter__(self):
        return iter(self._components)
    
    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._components))
    
    def __eq__(self, other):
        return tuple(self) == tuple(other)  #　tuple(iter)将可迭代的对象转为元组
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)
    
    def  __len__(self):
        return len(self._components)
    
    def __getitem__(self, index):
        return self._components[index]

In [19]:
v = Vector([3, 4, 5])
v[0:2]

array('d', [3.0, 4.0])

### 10.4.1 切片原理

In [20]:
# 用下面的代码来看切片的原理
class MySeq(object):
    
    def __getitem__(self, index):
        return index

In [21]:
s = MySeq()

In [22]:
s[1]

1

In [23]:
s[1:4]

slice(1, 4, None)

In [24]:
s[1:4:2, 7:9]

(slice(1, 4, 2), slice(7, 9, None))

In [25]:
dir(slice)

['__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'indices',
 'start',
 'step',
 'stop']

In [26]:
# 看一看, "整顿slice"
help(slice.indices)

Help on method_descriptor:

indices(...)
    S.indices(len) -> (start, stop, stride)
    
    Assuming a sequence of length len, calculate the start and stop
    indices, and the stride length of the extended slice described by
    S. Out of bounds indices are clipped in a manner consistent with the
    handling of normal slices.



In [27]:
slice(-3, None, None).indices(5)  # 也就是-3:等同于2:5:1

(2, 5, 1)

### 10.4.2 能处理切片的\_\_getitem\_\_方法

In [38]:
# 这是第一版的Vector
from array import array
import reprlib
import math
import numbers

class Vector(object):
    typecode = 'd'
    
    def __init__(self, components):
        self._components = array(self.typecode, components)
        
    def __iter__(self):
        return iter(self._components)
    
    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._components))
    
    def __eq__(self, other):
        return tuple(self) == tuple(other)  #　tuple(iter)将可迭代的对象转为元组
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)
    
    def  __len__(self):
        return len(self._components)
    
    def __getitem__(self, index):
        if isinstance(index, numbers.Integral):
            return self._components[index]  # 如果直接是int类型就返回对应的值
        elif isinstance(index, slice):
            cls = type(self)
            return cls(self._components[index])  # 如果是slice就返回本类
        else:
            msg = "{cls.__name__} indices must be integers"
            raise TypeError(msg.format(cls=cls))

In [39]:
v = Vector(range(7))
v[-1]

6.0

In [40]:
v[1:4]

Vector([1.0, 2.0, 3.0])

In [42]:
a = slice(1, 10, 1)
a

slice(1, 10, 1)

In [43]:
for i in a:
    print(i)  # slice is not iterable

TypeError: 'slice' object is not iterable

## 10.5 Vector类第3版：动态存取属性

In [53]:
# 先看一版比较差的getattr实现

from array import array
import reprlib
import math
import numbers

class Vector(object):
    typecode = 'd'
    
    def __init__(self, components):
        self._components = array(self.typecode, components)
        
    def __iter__(self):
        return iter(self._components)
    
    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._components))
    
    def __eq__(self, other):
        return tuple(self) == tuple(other)  #　tuple(iter)将可迭代的对象转为元组
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)
    
    def  __len__(self):
        return len(self._components)
    
    def __getitem__(self, index):
        if isinstance(index, numbers.Integral):
            return self._components[index]  # 如果直接是int类型就返回对应的值
        elif isinstance(index, slice):
            cls = type(self)
            return cls(self._components[index])  # 如果是slice就返回本类
        else:
            msg = "{cls.__name__} indices must be integers"
            raise TypeError(msg.format(cls=cls))
    
    def __getattr__(self, name):
        shortcut = "xyzt"
        cls = type(self)
        if len(name) == 1:
            pos = shortcut.find(name)
            if 0 <= pos < len(self._components):
                return self[pos]
        msg = "{.__name__!r} object has no attribute {!r}"
        raise AttributeError(msg.format(cls, name))

In [54]:
v = Vector(range(5))
v

Vector([0.0, 1.0, 2.0, 3.0, 4.0])

In [55]:
v.x

0.0

In [56]:
v1 = Vector(range(1))

In [57]:
v1.y

AttributeError: 'Vector' object has no attribute 'y'

In [58]:
# 注意看这里的赋值错误
v.x = 10

In [59]:
v.x

10

In [60]:
v

Vector([0.0, 1.0, 2.0, 3.0, 4.0])

In [64]:
# 添加setattr实现

from array import array
import reprlib
import math
import numbers

class Vector(object):
    typecode = 'd'
    shortcut = "xyzt"
    
    def __init__(self, components):
        self._components = array(self.typecode, components)
        
    def __iter__(self):
        return iter(self._components)
    
    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._components))
    
    def __eq__(self, other):
        return tuple(self) == tuple(other)  #　tuple(iter)将可迭代的对象转为元组
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)
    
    def  __len__(self):
        return len(self._components)
    
    def __getitem__(self, index):
        if isinstance(index, numbers.Integral):
            return self._components[index]  # 如果直接是int类型就返回对应的值
        elif isinstance(index, slice):
            cls = type(self)
            return cls(self._components[index])  # 如果是slice就返回本类
        else:
            msg = "{cls.__name__} indices must be integers"
            raise TypeError(msg.format(cls=cls))
    
    def __getattr__(self, name):  # 这个方法是到处都找不到该属性的时候才会被调用
        cls = type(self)
        if len(name) == 1:
            pos = self.shortcut.find(name)
            if 0 <= pos < len(self._components):
                return self[pos]
        msg = "{.__name__!r} object has no attribute {!r}"
        raise AttributeError(msg.format(cls, name))
        
    def __setattr__(self, name, value):
        cls = type(self)
        if len(name) == 1:
            if name in cls.shortcut:
                error = "readonly attributes {attr_name!r}"
            else:
                error = ""
                
            if error:
                msg = error.format(attr_name=name)
                raise AttributeError(msg)
                
        super().__setattr__(name, value)  # 默认调用super的方法, 提供标准行为

In [65]:
v = Vector(range(10))

In [66]:
v.x

0.0

In [68]:
v.n = 10

## 10.6 Vector类第4版：散列和快速等值测试
我们要再次实现\_\_hash\_\_方法。加上现在的eq方法，这就可以把Vector变成一个可散列的类。

In [72]:
# 先试一下能不能散列
set().add(v)

TypeError: unhashable type: 'Vector'

In [123]:
# 添加setattr实现

from array import array
import reprlib
import math
import numbers
import functools
import operator

class Vector(object):
    typecode = 'd'
    shortcut = "xyzt"
    
    def __init__(self, components):
        self._components = array(self.typecode, components)
        
    def __iter__(self):
        return iter(self._components)
    
    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._components))
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)
    
    def  __len__(self):
        return len(self._components)
    
    def __getitem__(self, index):
        if isinstance(index, numbers.Integral):
            return self._components[index]  # 如果直接是int类型就返回对应的值
        elif isinstance(index, slice):
            cls = type(self)
            return cls(self._components[index])  # 如果是slice就返回本类
        else:
            msg = "{cls.__name__} indices must be integers"
            raise TypeError(msg.format(cls=cls))
    
    def __getattr__(self, name):  # 这个方法是到处都找不到该属性的时候才会被调用
        cls = type(self)
        if len(name) == 1:
            pos = self.shortcut.find(name)
            if 0 <= pos < len(self._components):
                return self[pos]
        msg = "{.__name__!r} object has no attribute {!r}"
        raise AttributeError(msg.format(cls, name))
        
    def __setattr__(self, name, value):
        cls = type(self)
        if len(name) == 1:
            if name in cls.shortcut:
                error = "readonly attributes {attr_name!r}"
            else:
                error = ""
                
            if error:
                msg = error.format(attr_name=name)
                raise AttributeError(msg)
                
        super().__setattr__(name, value)  # 默认调用super的方法, 提供标准行为
        
    def __hash__(self):
        # 一般重写hash就需要重写eq, 原则就是eq的对象hash一定等， 但是hash一样的对象不一定eq
        # hashes = (hash(x) for x in self)
        hashes = map(hash, self)
        return functools.reduce(operator.xor, hashes, 0)
    
    def __eq__(self, other):
        # 由于前面的eq方法需要全部遍历元素比较慢, 所以这里采用并行迭代的方式
#         if len(self) != len(other):
#             return False  # 如果长度不等就不可能相等
        
#         for x, y in zip(self, other):
#             if x != y:
#                 return False
#         return True

        #  另一种实现方式
        return len(self) == len(other) and all(a==b for a, b in zip(self, other))

In [124]:
v = Vector([1, 2, 3])

In [125]:
v2 = Vector([1, 2, 3])

In [126]:
s = set()

In [127]:
s.add(v)
s

{Vector([1.0, 2.0, 3.0])}

In [128]:
v == v2

True