# Python语言精要

## Python解释器

```
$ python
Python 2.7.2 (default, Oct  4 2011, 20:06:09)
[GCC 4.6.1] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> a = 5
>>> print a
5
```

In [None]:
import this

## iPython交互式命令行

```
$ ipython
Python 2.7.10 (default, Oct 23 2015, 18:05:06)
Type "copyright", "credits" or "license" for more information.

IPython 4.0.1 -- An enhanced Interactive Python.
?         -> Introduction and overview of IPython's features.
%quickref -> Quick reference.
help      -> Python's own help system.
object?   -> Details about 'object', use 'object??' for extra details.

In [1]: import datetime

In [2]: datetime?
Type:        module
String form: <module 'datetime' from '/Users/suqi/.virtualenvs/scrapy/lib/python2.7/lib-dynload/datetime.so'>
File:        ~/.virtualenvs/scrapy/lib/python2.7/lib-dynload/datetime.so
Docstring:   Fast implementation of the datetime type.
```

## 1. 基本概念

### 语法

#### 缩进，而不是括号
- 下面的代码只是演示缩进，没有实际操作意义

In [None]:
for x in array:
    if x < pivot:
        less.append(x)
    else:
        greater.append(x)

In [None]:
for x in array {
    if x < pivot {
        less.append(x)
    } else {
        greater.append(x)
    }
}

In [None]:
for x in array
{
  if x < pivot
  {
    less.append(x)
  }
  else
  {
    greater.append(x)
  }
}

In [None]:
a = 5; b = 6; c = 7

#### 一切皆对象

#### 注释

In [None]:
results = []
for line in file_handle:
    # keep the empty lines for now
    # if len(line) == 0:
    #   continue
    results.append(line.replace('foo', 'bar'))

#### 调用函数和对象方法

In [None]:
result = f(x, y, z)
g()

In [None]:
obj.some_method(x, y, z)

In [None]:
result = f(a, b, c, d=5, e='foo')

#### 变量，以及传递引用

In [None]:
a = [1, 2, 3]

In [None]:
b = a

In [None]:
a.append(4)
b

In [None]:
def append_element(some_list, element):
    some_list.append(element)

In [None]:
data = [1, 2, 3]

append_element(data, 4)

In [4]: data
Out[4]: [1, 2, 3, 4]

#### 动态类型，强类型

In [None]:
a = 5
type(a)
a = 'foo'
type(a)

In [None]:
'5' + 5

In [None]:
a = 4.5
b = 2
# 字符串格式化
print 'a is %s, b is %s' % (type(a), type(b))
a / b

In [None]:
a = 5
isinstance(a, int)

In [None]:
a = 5; b = 4.5
isinstance(a, (int, float))
isinstance(b, (int, float))

#### 属性和方法

In [None]:
In [1]: a = 'foo'

In [2]: a.<Tab>
a.capitalize  a.format      a.isupper     a.rindex      a.strip
a.center      a.index       a.join        a.rjust       a.swapcase
a.count       a.isalnum     a.ljust       a.rpartition  a.title
a.decode      a.isalpha     a.lower       a.rsplit      a.translate
a.encode      a.isdigit     a.lstrip      a.rstrip      a.upper
a.endswith    a.islower     a.partition   a.split       a.zfill
a.expandtabs  a.isspace     a.replace     a.splitlines
a.find        a.istitle     a.rfind       a.startswith

#### “鸭子”类型

In [None]:
def isiterable(obj):
    try:
        iter(obj)
        return True
    except TypeError: # not iterable
        return False

In [None]:
isiterable('a string')
isiterable([1, 2, 3])
isiterable(5)

In [None]:
if not isinstance(x, list) and isiterable(x):
    x = list(x)

#### import 模块

In [None]:
# some_module.py
PI = 3.14159

def f(x):
    return x + 2

def g(a, b):
    return a + b

In [None]:
import some_module
result = some_module.f(5)
pi = some_module.PI

In [None]:
from some_module import f, g, PI
result = g(5, PI)

In [None]:
import some_module as sm
from some_module import PI as pi, g as gf

r1 = sm.f(pi)
r2 = gf(6, pi)

#### 二元运算符 和 比较

In [None]:
5 - 7
12 + 21.5
5 <= 2

In [None]:
a = [1, 2, 3]
b = a
# Note, the list function always creates a new list
c = list(a)
a is b
a is not c

In [None]:
a == c

In [None]:
a = None
a is None

#### 严格加载 vs. 懒加载
- python： 严格加载，声明时即计算
- haskell等函数式编程语言：懒加载，使用时才计算

> python可以通过生成器来进行懒加载

In [None]:
a = b = c = 5
d = a + b * c

#### 可变与不可变对象

In [None]:
a_list = ['foo', 2, [4, 5]]
a_list[2] = (3, 4)
a_list

In [None]:
a_tuple = (3, 5, (4, 5))
a_tuple[1] = 'four'

### 原生类型

#### 数字类型

In [None]:
ival = 17239871
ival ** 6

In [None]:
fval = 7.243
fval2 = 6.78e-5

In [None]:
3 / 2

In [None]:
from __future__ import division

In [None]:
3 / float(2)

In [None]:
3 // 2

In [None]:
cval = 1 + 2j
cval * (1 - 2j)

#### 字符串

In [None]:
a = 'one way of writing a string'
b = "another way"

In [None]:
c = """
This is a longer string that
spans multiple lines
"""

In [None]:
a = 'this is a string'
a[10] = 'f'
b = a.replace('string', 'longer string')
b

In [None]:
a = 5.6
s = str(a)
s

In [None]:
s = 'python'
list(s)
s[:3]

In [None]:
s = '12\\34'
print s

In [None]:
s = r'this\has\no\special\characters'
s

In [None]:
a = 'this is the first half '
b = 'and this is the second half'
a + b

In [None]:
template = '%.2f %s are worth $%d'

In [None]:
template % (4.5560, 'Argentine Pesos', 1)

#### 布尔值

In [None]:
True and True
False or True

In [None]:
a = [1, 2, 3]
if a:
    print 'I found something!'

b = []
if not b:
    print 'Empty!'

In [None]:
bool([]), bool([1, 2, 3])
bool('Hello world!'), bool('')
bool(0), bool(1)

#### 类型转换

In [None]:
s = '3.14159'
fval = float(s)
type(fval)
int(fval)
bool(fval)
bool(0)

#### None 空值

In [None]:
a = None
a is None
b = 5
b is not None

In [None]:
def add_and_maybe_multiply(a, b, c=None):
    result = a + b

    if c is not None:
        result = result * c

    return result

#### 日期和时间

In [None]:
from datetime import datetime, date, time
dt = datetime(2011, 10, 29, 20, 30, 21)
dt.day
dt.minute

In [None]:
dt.date()
dt.time()

In [None]:
dt.strftime('%m/%d/%Y %H:%M')


In [None]:
datetime.strptime('20091031', '%Y%m%d')


In [None]:
dt.replace(minute=0, second=0)

In [None]:
dt2 = datetime(2011, 11, 15, 22, 30)
delta = dt2 - dt
delta
type(delta)

In [None]:
dt
dt + delta

### 2. 控制流

#### If, elif, and else

In [None]:
if x < 0:
    print 'It's negative'

In [None]:
if x < 0:
    print 'It's negative'
elif x == 0:
    print 'Equal to zero'
elif 0 < x < 5:
    print 'Positive but smaller than 5'
else:
    print 'Positive and larger than 5'

In [None]:
a = 5; b = 7
c = 8; d = 4
if a < b or c > d:
    print 'Made it'

#### For 循环

In [None]:
for value in collection:
    # do something with value

In [None]:
sequence = [1, 2, None, 4, None, 5]
total = 0
for value in sequence:
    if value is None:
        continue
    total += value

In [None]:
sequence = [1, 2, 0, 4, 6, 5, 2, 1]
total_until_5 = 0
for value in sequence:
    if value == 5:
        break
    total_until_5 += value

In [None]:
for a, b, c in iterator:
    # do something

### While 循环

In [None]:
x = 256
total = 0
while x > 0:
    if total > 500:
        break
    total += x
    x = x // 2

`思考：为什么没有switch语句`

#### pass语句

In [None]:
if x < 0:
    print 'negative!'
elif x == 0:
    # TODO: put something smart here
    pass
else:
    print 'positive!'

In [None]:
def f(x, y, z):
    # TODO: implement this function!
    pass


#### 异常处理

In [None]:
float('1.2345')
float('something')

In [None]:
def attempt_float(x):
    try:
        return float(x)
    except:
        return x

In [None]:
attempt_float('1.2345')
attempt_float('something')

In [None]:
float((1, 2))

In [None]:
def attempt_float(x):
    try:
        return float(x)
    except ValueError:
        return x

In [None]:
attempt_float((1, 2))

In [None]:
def attempt_float(x):
    try:
        return float(x)
    except (TypeError, ValueError):
        return x

In [None]:
f = open(path, 'w')

try:
    write_to_file(f)
finally:
    f.close()

In [None]:
f = open(path, 'w')

try:
    write_to_file(f)
except:
    print 'Failed'
else:
    print 'Succeeded'
finally:
    f.close()

#### range 和 xrange

In [None]:
range(10)

In [None]:
range(0, 20, 2)

In [None]:
seq = [1, 2, 3, 4]
for i in range(len(seq)):
    val = seq[i]

In [None]:
sum = 0
for i in xrange(10000):
    # % is the modulo operator
    if x % 3 == 0 or x % 5 == 0:
        sum += i


#### 三元运算符

In [None]:
x = 5
value = 'Non-negative' if x >= 0 else 'Negative'

## 3. 数据结构和序列

### 元组 Tuple

In [None]:
tup = 4, 5, 6
tup

In [None]:
nested_tup = (4, 5, 6), (7, 8)
nested_tup

In [None]:
tuple([4, 0, 2])
tup = tuple('string')
tup

In [None]:
tup[0]

In [None]:
tup = tuple(['foo', [1, 2], True])
tup[2] = False

# however
tup[1].append(3)
tup

In [None]:
(4, None, 'foo') + (6, 0) + ('bar',)

In [None]:
('foo', 'bar') * 4

#### 元组解包

In [None]:
tup = (4, 5, 6)
a, b, c = tup
b

In [None]:
tup = 4, 5, (6, 7)
a, b, (c, d) = tup
d

#### 元组方法

In [None]:
a = (1, 2, 2, 2, 3, 4, 2)
a.count(2)

### 列表 List

In [None]:
a_list = [2, 3, 7, None]

tup = ('foo', 'bar', 'baz')
b_list = list(tup)
b_list
b_list[1] = 'peekaboo'
b_list

#### 添加和删除元素

In [None]:
b_list.append('dwarf')
b_list

In [None]:
b_list.insert(1, 'red')
b_list

In [None]:
b_list.pop(2)
b_list

In [None]:
b_list.append('foo')
b_list.remove('foo')
b_list

In [None]:
'dwarf' in b_list

#### 连接与合并列表

In [None]:
[4, None, 'foo'] + [7, 8, (2, 3)]


In [None]:
x = [4, None, 'foo']
x.extend([7, 8, (2, 3)])
x

In [None]:
everything = []
for chunk in list_of_lists:
    everything.extend(chunk)


In [None]:
everything = []
for chunk in list_of_lists:
    everything = everything + chunk

#### 排序

In [None]:
a = [7, 2, 5, 1, 3]
a.sort()
a

In [None]:
b = ['saw', 'small', 'He', 'foxes', 'six']
b.sort(key=len)
b

#### 二分查找，以及维护一个排序的列表

In [None]:
import bisect
c = [1, 2, 2, 2, 3, 4, 7]
bisect.bisect(c, 2)
bisect.bisect(c, 5)
bisect.insort(c, 6)
c

#### 切片

In [None]:
seq = [7, 2, 3, 7, 5, 6, 0, 1]
seq[1:5]

In [None]:
seq[3:4] = [6, 3]
seq

In [None]:
seq[:5]
seq[3:]

In [None]:
seq[-4:]
seq[-6:-2]

In [None]:
seq[::2]

In [None]:
seq[::-1]

### 内置的序列函数

#### enumerate 枚举

In [None]:

i = 0
for value in collection:
   # do something with value
   i += 1

In [None]:
for i, value in enumerate(collection):
   # do something with value

In [None]:
some_list = ['foo', 'bar', 'baz']
mapping = dict((v, i) for i, v in enumerate(some_list))
mapping

#### sorted 排序

In [None]:
sorted([7, 1, 2, 6, 0, 3, 2])
sorted('horse race')

In [None]:
sorted(set('this is just some string'))

#### zip

In [None]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zip(seq1, seq2)

In [None]:
seq3 = [False, True]
zip(seq1, seq2, seq3)

In [None]:
for i, (a, b) in enumerate(zip(seq1, seq2)):
    print('%d: %s, %s' % (i, a, b))

In [None]:
pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'),
            ('Schilling', 'Curt')]
first_names, last_names = zip(*pitchers)
first_names
last_names

In [None]:
zip(seq[0], seq[1], ..., seq[len(seq) - 1])

#### reversed 倒序

In [None]:
list(reversed(range(10)))

### 字典 Dict

In [None]:
empty_dict = {}
d1 = {'a' : 'some value', 'b' : [1, 2, 3, 4]}
d1

In [None]:
d1[7] = 'an integer'
d1
d1['b']

In [None]:
'b' in d1

In [None]:
d1[5] = 'some value'
d1['dummy'] = 'another value'
del d1[5]
ret = d1.pop('dummy')
ret

In [None]:
d1.keys()
d1.values()

In [None]:
d1.update({'b' : 'foo', 'c' : 12})
d1

#### 从序列中创建字典

In [None]:
mapping = {}
for key, value in zip(key_list, value_list):
    mapping[key] = value

In [None]:
mapping = dict(zip(range(5), reversed(range(5))))
mapping

#### 默认值

In [None]:
if key in some_dict:
    value = some_dict[key]
else:
    value = default_value

In [None]:
value = some_dict.get(key, default_value)

In [None]:
words = ['apple', 'bat', 'bar', 'atom', 'book']
by_letter = {}

for word in words:
    letter = word[0]
    if letter not in by_letter:
        by_letter[letter] = [word]
    else:
        by_letter[letter].append(word)

by_letter

In [None]:
by_letter.setdefault(letter, []).append(word)

In [None]:
from collections import defaultdict
by_letter = defaultdict(list)
for word in words:
    by_letter[word[0]].append(word)


In [None]:
counts = defaultdict(lambda: 4)

#### 字典key键的有效数据类型：可hash

In [None]:
hash('string')
hash((1, 2, (2, 3)))
hash((1, 2, [2, 3])) # fails because lists are mutable

In [None]:
d = {}
d[tuple([1, 2, 3])] = 5
d

### 集合 Set

In [None]:
set([2, 2, 2, 1, 3, 3])
{2, 2, 2, 1, 3, 3}

In [None]:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}
a | b  # union (or)
a & b  # intersection (and)
a - b  # difference
a ^ b  # symmetric difference (xor)

In [None]:
a_set = {1, 2, 3, 4, 5}
{1, 2, 3}.issubset(a_set)
a_set.issuperset({1, 2, 3})

In [None]:
{1, 2, 3} == {3, 2, 1}

### 推导式 List, set, and dict comprehensions

In [None]:
strings = ['a', 'as', 'bat', 'car', 'dove', 'python']
[x.upper() for x in strings if len(x) > 2]

In [None]:
unique_lengths = {len(x) for x in strings}
unique_lengths

In [None]:
loc_mapping = {val : index for index, val in enumerate(strings)}
loc_mapping

In [None]:
loc_mapping = dict((val, idx) for idx, val in enumerate(strings)}

#### 嵌套列表解析

In [None]:
all_data = [['Tom', 'Billy', 'Jefferson', 'Andrew', 'Wesley', 'Steven', 'Joe'],
            ['Susie', 'Casey', 'Jill', 'Ana', 'Eva', 'Jennifer', 'Stephanie']]


In [None]:
names_of_interest = []
for names in all_data:
    enough_es = [name for name in names if name.count('e') > 2]
    names_of_interest.extend(enough_es)

In [None]:
result = [name for names in all_data for name in names
          if name.count('e') >= 2]
result


In [None]:
some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
flattened = [x for tup in some_tuples for x in tup]
flattened

In [None]:
flattened = []

for tup in some_tuples:
    for x in tup:
        flattened.append(x)

In [None]:
In [229]: [[x for x in tup] for tup in some_tuples]

## 4. 函数

In [None]:
def my_function(x, y, z=1.5):
    if z > 1:
        return z * (x + y)
    else:
        return z / (x + y)

In [None]:
my_function(5, 6, z=0.7)
my_function(3.14, 7, 3.5)

### 名称空间，作用域，和本地函数

In [None]:
def func():
    a = []
    for i in range(5):
        a.append(i)

In [None]:
a = []
def func():
    for i in range(5):
        a.append(i)


In [None]:
a = None
def bind_a_variable():
    global a
    a = []
bind_a_variable()
print a

In [None]:
def outer_function(x, y, z):
    def inner_function(a, b, c):
        pass
    pass

### 返回多个值

In [None]:
def f():
    a = 5
    b = 6
    c = 7
    return a, b, c

a, b, c = f()

In [None]:
return_value = f()

In [None]:
def f():
    a = 5
    b = 6
    c = 7
    return {'a' : a, 'b' : b, 'c' : c}

### 函数都是对象

In [None]:

states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda',
          'south   carolina##', 'West virginia?']

In [None]:
import re  # Regular expression module

def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip()
        value = re.sub('[!#?]', '', value) # remove punctuation
        value = value.title()
        result.append(value)
    return result

In [None]:
In [15]: clean_strings(states)
Out[15]:
['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South Carolina',
 'West Virginia']

In [None]:
def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

In [None]:
In [22]: clean_strings(states, clean_ops)
Out[22]:
['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South Carolina',
 'West Virginia']

In [None]:
In [23]: map(remove_punctuation, states)
Out[23]:
['   Alabama ',
 'Georgia',
 'Georgia',
 'georgia',
 'FlOrIda',
 'south   carolina',
 'West virginia']

### 匿名函数 lambda

In [None]:
def short_function(x):
    return x * 2

equiv_anon = lambda x: x * 2

In [None]:
def apply_to_list(some_list, f):
    return [f(x) for x in some_list]

ints = [4, 0, 1, 5, 6]
apply_to_list(ints, lambda x: x * 2)

In [None]:
strings = ['foo', 'card', 'bar', 'aaaa', 'abab']

In [None]:
strings.sort(key=lambda x: len(set(list(x))))
strings

### 闭包：返回函数的函数

In [None]:
def make_closure(a):
    def closure():
        print('I know the secret: %d' % a)
    return closure

closure = make_closure(5)

In [None]:
def make_watcher():
    have_seen = {}

    def has_been_seen(x):
        if x in have_seen:
            return True
        else:
            have_seen[x] = True
            return False

    return has_been_seen

In [None]:
watcher = make_watcher()
vals = [5, 6, 1, 5, 1, 6, 3, 5]
[watcher(x) for x in vals]

In [None]:
def make_counter():
    count = [0]
    def counter():
        # increment and return the current count
        count[0] += 1
        return count[0]
    return counter

counter = make_counter()

In [None]:
def format_and_pad(template, space):
    def formatter(x):
        return (template % x).rjust(space)

    return formatter

In [None]:
fmt = format_and_pad('%.4f', 15)
fmt(1.756)

### 扩展的调用语法：*args, **kwargs

In [None]:
a, b, c = args
d = kwargs.get('d', d_default_value)
e = kwargs.get('e', e_default_value)

In [None]:
def say_hello_then_call_f(f, *args, **kwargs):
    print 'args is', args
    print 'kwargs is', kwargs
    print("Hello! Now I'm going to call %s" % f)
    return f(*args, **kwargs)

def g(x, y, z=1):
    return (x + y) / z

In [None]:
In [8]:  say_hello_then_call_f(g, 1, 2, z=5.)
args is (1, 2)
kwargs is {'z': 5.0}
Hello! Now I'm going to call <function g at 0x2dd5cf8>
Out[8]: 0.6

### 5. 面向对象编程

In [None]:
class Phone(object):
    os = 'nokia'

class AndroidPhone(object):
    os = 'android'

class iPhone(object):
    os = 'ios'
    
    def touch_open(self, finger):
        if finger == 5:
            print u"解锁成功"

p1 = Phone()
p2 = AndroidPhone()
p3 = iPhone()

print p1.os, p2.os, p3.os
p3.touch_open(5)

## 6. 输入和输出

In [2]:
user_input = raw_input("Please input your name: >>>")
print u"Hello, {}".format(user_input)

Please input your name: >>>mattsu
Hello, mattsu


In [1]:
raw_input(u'请输入你的名字：'.encode('utf-8'))

请输入你的名字：苏琦


'\xe8\x8b\x8f\xe7\x90\xa6'

#### 文件和操作系统

In [None]:
path = '.txt'
f = open(path)

In [None]:
for line in f:
    pass

In [None]:
lines = [x.rstrip() for x in open(path)]
lines

In [None]:
with open('tmp.txt', 'w') as handle:
    handle.writelines(x for x in open(path) if len(x) > 1)

open('tmp.txt').readlines()

In [None]:
os.remove('tmp.txt')

### 7. 异常处理

In [4]:
try:
    f = open("nofile.txt")
    print f.read()
except IOError as ioe:
    print u"抓到了：{}".format(ioe)
except Exception as e:
    print e

抓到了：[Errno 2] No such file or directory: 'nofile.txt'


### 8. 实战
开发一套敏感词过滤程序， 用户会输入一个语句，但只要包含以下文件中的任意词汇，都自动变换成*号输出
- (words.txt) 每行为一个词汇
```
雾霾
主席
炸弹
等等
```