# 3.1 数据结构和序列

# 二分搜索和维护已排序的列表

In [1]:
import bisect
c = [1, 2, 2, 2, 3, 4, 7]

In [4]:
bisect.bisect(c,2)

4

In [6]:
bisect.insort(c,6)
c

[1, 2, 2, 2, 3, 4, 6, 6, 7]

# enumerate函数

In [8]:
some_list = ['foo', 'bar', 'baz', 'baz']
mapping = {}
for i,v in enumerate(some_list):
    mapping[v] = i
mapping

{'bar': 1, 'baz': 3, 'foo': 0}

# zip函数

In [9]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zipped = zip(seq1, seq2)
zipped

<zip at 0x5a0f58>

In [12]:
list(zipped)

[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]

In [None]:
zip可以处理任意多的序列，元素的个数取决于最短的序列：

In [14]:
seq3 = [False, True]
list(zip(seq1, seq2, seq3))

[('foo', 'one', False), ('bar', 'two', True)]

In [15]:
for i, (a,b) in enumerate(zip(seq1,seq2)):
    print('{0}:{1}:{2}'.format(i,a,b))

0:foo:one
1:bar:two
2:baz:three


In [18]:
pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'),
            ('Schilling', 'Curt')]

In [20]:
first_name, last_name = zip(*pitchers)

In [21]:
first_name

('Nolan', 'Roger', 'Schilling')

In [22]:
last_name

('Ryan', 'Clemens', 'Curt')

# reversed函数

# 3.2 函数

# 函数也是对象

In [23]:
 states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda',
           'south   carolina##', 'West virginia?']

In [24]:
import re

def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip()
        value = re.sub('[!#?]', '', value)
        value = value.title()
        result.append(value)
    return result

In [25]:
clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [26]:
def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

In [27]:
clean_strings(states,clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [28]:
for x in map(remove_punctuation, states):
    print(x)

   Alabama 
Georgia
Georgia
georgia
FlOrIda
south   carolina
West virginia


# itertools模块

In [30]:
import itertools
first_letter = lambda x: x[0]
names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']
for letter, names in itertools.groupby(names, first_letter):
    print(letter, list(names))    # names is a generator

A ['Alan', 'Adam']
W ['Wes', 'Will']
A ['Albert']
S ['Steven']


# 3.3 文件和操作系统

In [None]:
为了打开一个文件以便读写，可以使用内置的open函数以及一个相对或绝对的文件路径：
默认情况下，文件是以只读模式（'r'）打开的。然后，我们就可以像处理列表那样来处理这个文件句柄f了，比如对行进行迭代：
从文件中取出的行都带有完整的行结束符（EOL），因此你常常会看到下面这样的代码（得到一组没有EOL的行）：

In [32]:
path = 'examples/segismundo.txt'
f = open(path)
lines = [x.rstrip() for x in open(path,encoding='utf_8')]    # 指定编码为utf_8
lines

['Sueña el rico en su riqueza,',
 'que más cuidados le ofrece;',
 '',
 'sueña el pobre que padece',
 'su miseria y su pobreza;',
 '',
 'sueña el que a medrar empieza,',
 'sueña el que afana y pretende,',
 'sueña el que agravia y ofende,',
 '',
 'y en el mundo, en conclusión,',
 'todos sueñan lo que son,',
 'aunque ninguno lo entiende.',
 '']

In [33]:
with open('tmp.txt', 'w') as handle:
    handle.writelines(x for x in open(path) if len(x) > 1)

In [36]:
with open('tmp.txt') as f:
    lines = f.readlines()
lines

['Sue帽a el rico en su riqueza,\n',
 'que m谩s cuidados le ofrece;\n',
 'sue帽a el pobre que padece\n',
 'su miseria y su pobreza;\n',
 'sue帽a el que a medrar empieza,\n',
 'sue帽a el que afana y pretende,\n',
 'sue帽a el que agravia y ofende,\n',
 'y en el mundo, en conclusi贸n,\n',
 'todos sue帽an lo que son,\n',
 'aunque ninguno lo entiende.\n']