# Python特殊语法和字符串
## 一、Python特殊语法和语法糖

### 1. 使用random产生随机数

In [1]:
import random
random.seed(1234) # 设置随机种子每次将产生相同的随机数
print('choice(range(10)) =', random.choice(range(10)))
print('randrange(0, 10, 2) =', random.randrange(0, 10, 2))
print('random() =', random.random())
print('uniform(10,20) =', random.uniform(10,20))
print('randomint(10) =', random.randint(10, 20))
a = [1,2,3,4]
random.shuffle(a)
print('shuffle([1,2,3,4]) then', a)
print('random.sample(a, 2) =', random.sample(a, 2))

choice(range(10)) = 7
randrange(0, 10, 2) = 0
random() = 0.007491470058587191
uniform(10,20) = 19.10975962449124
randomint(10) = 19
shuffle([1,2,3,4]) then [2, 4, 3, 1]
random.sample(a, 2) = [2, 4]


### 编程实践：创建一个包含10个随机数的列表，选出其中偶数项，再选出偶数项中最大的2项

In [2]:
import random

def get_random_numbers(count):
    numbers = []
    for _ in range(count):
        numbers.append(random.randint(0, 100))
    even_numbers= []
    for i in numbers:
        if i % 2 == 0:
            even_numbers.append(i)
    even_numbers.sort()
    return even_numbers[-2:]

get_random_numbers(10)

[82, 100]

In [3]:
def get_random_numbers(count):
    numbers = sorted([x for x in [random.randint(0, 100) for _ in range(count)] if x % 2 == 0], reverse = True)
    return numbers[:2]
get_random_numbers(10)

[64, 62]

### 2. 列表推导式
列表推导式是Python的语法糖，用来快速生成列表

In [4]:
import math
series = range(10)
print([item for item in series])
print([math.pow(item, 2) for item in series])
print([math.pow(item, 2) for item in series if item > 4])

print('-' * 20)
booze = ['Beer', 'Wine']
soft_drinks = ['Water', 'Soda', 'Juice']
print([(x, y) for x in booze for y in soft_drinks])
print([x for x in zip(booze, soft_drinks)])

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0.0, 1.0, 4.0, 9.0, 16.0, 25.0, 36.0, 49.0, 64.0, 81.0]
[25.0, 36.0, 49.0, 64.0, 81.0]
--------------------
[('Beer', 'Water'), ('Beer', 'Soda'), ('Beer', 'Juice'), ('Wine', 'Water'), ('Wine', 'Soda'), ('Wine', 'Juice')]
[('Beer', 'Water'), ('Wine', 'Soda')]


### 编程实践：使用列表推导式把小写字母列表转换为大写字母列表

In [5]:
letters = list('qwertyuiopasdfghjklzxcvbnm')
def convert_letters(letter_list):
    result_list = [x.upper() for x in letters]
    return result_list[::2]

convert_letters(letters)

['Q', 'E', 'T', 'U', 'O', 'A', 'D', 'G', 'J', 'L', 'X', 'V', 'N']

### 3. 字典推导式和集合推导式

In [6]:
list_of_tuples = [('Mike','Python'), ('Jim','Ruby'), ('Brad','PHP')]

d1 = {key: value for key, value in list_of_tuples}
print(d1)
d2 = {val: key for key, val in d1.items()}
print(d2)

booze = ['Beer', 'Wine', 'Scotch']
soft_drinks = ['Water', 'Soda', 'Juice']
d3 = {key: val for key, val in zip(booze, soft_drinks) }
print(d3)
s1 = {key for key in 'Python' if key not in 'thon'}
print(s1)

{'Mike': 'Python', 'Jim': 'Ruby', 'Brad': 'PHP'}
{'Python': 'Mike', 'Ruby': 'Jim', 'PHP': 'Brad'}
{'Beer': 'Water', 'Wine': 'Soda', 'Scotch': 'Juice'}
{'P', 'y'}


### 编程实践：使用字典推导式改进从价格字典中提取价格大于200的子集

In [7]:
prices = {'ACME': 45.23, 'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.20, 'FB': 10.75}
def get_prices(price_dict):
    return {key: val for key, val in prices.items() if val > 200}

get_prices(prices)

{'AAPL': 612.78, 'IBM': 205.55}

### 4. 不可变类型和可变类型
在Python中数字、字符串、元组是不可变类型，列表、字典是可变类型。不可变类型的对象不能在内存中就地修改，Python也没有提供修改的API，而可变类型的对象在内存中可以被就地修改。

In [8]:
i = 2 # 创建int对象，值为2
print(i, id(i))
i = i + 1 # 创建新的int对象，值为3，i指向新的int对象
# 原值2并没有改变，只是没有变量引用
print(i, id(i))

2 140712937582368
3 140712937582400


In [9]:
s = 'abc' # 创建str对象，值为abc
print(s, id(s))
s = s.replace('c', 'd') # replace方法创建新的str对象，值为abd，s指向新的str对象
# 原值abc并没有改变，只是没有变量引用
print(s, id(s))

abc 2795243697520
abd 2795311710816


In [10]:
t = (1, 2, 3) # 创建tuple对象，值为(1, 2, 3)
print(t, id(t))
t = t + (4, ) # 创建新的tuple对象，值为(1, 2, 3, 4)，t指向新的tuple对象
# 原值(1, 2, 3)并没有改变，只是没有变量引用
print(t, id(t))
# 无法修改元组内部元素的值

(1, 2, 3) 2795311288536
(1, 2, 3, 4) 2795311628840


In [11]:
l = [1, 2, 3] # 创建list对象，值为[1, 2, 3]
print(l, id(l))
l.append(4) # append方法修改对象，值为[1, 2, 3，4]，l的指向没有改变
print(l, id(l))

print(l, id(l[2])) 
l[2] = 10 # 修改list中第3个元素值，实际上第3个元素指向了新的对象
# 原值3并没有被改变，遵守int类型是不可变的原则，但是修改了第3个元素的指向，实际上修改了list的内容
print(l, id(l[2]))

[1, 2, 3] 2795311485448
[1, 2, 3, 4] 2795311485448
[1, 2, 3, 4] 140712937582400
[1, 2, 10, 4] 140712937582624


但是，对于较小的int类型和较短的str类型，为节省内存空间，具有相同值的变量指向同一个对象，但是该对象依然是不可变的

In [12]:
i1 = 2
print(i1, id(i1))
i2 = 2
print(i2, id(i2))
i1 = 2000
print(i1, id(i1))
i2 = 2000
print(i2, id(i2))

2 140712937582368
2 140712937582368
2000 2795311308432
2000 2795311306928


## 二. 字符串和正则表达式

### 1. 字符串

In [13]:
a = 'It\'s time' #转义字符
b = "It's time"
c = '''It's time''' 
d = """It's 
time""" #支持多行文本
e = "It's "\
"time" 
f = r'It\'s time'
g = """[\\ \n \' \t \"]""" #常用转义字符
h = "中文"
print(a, b, c)
print(d)
print(e)
print(f)
print(g)
print(h)

It's time It's time It's time
It's 
time
It's time
It\'s time
[\ 
 ' 	 "]
中文


### 2. 字符编码
任何数据类型都是对底层二进制数据的抽象，计算机只能处理数字，如果要处理文本，就必须先把文本转换为数字才行

计算机在设计时采用8个比特（bit）作为一个字节（byte），一个字节能表示的最大的整数范围是0-255（二进制00000000-11111111），如果要表示更大的整数，就必须用更多的字节。比如两个字节可以表示的最大整数是65535，4个字节可以表示的最大整数是4294967295

计算机处理文本的方式，就是使用一个整数代表一个文本字符

ASCII编码只有127个字符，使用1个字节，包括大小写英文字母、数字和一些符号。比如大写字母A的编码是65（二进制01000001），小写字母z的编码是122（二进制1111010）

使用非英文字符的国家，ASCII编码难以满足要求，中国制定了GB2312编码，使用2个字节，对汉字进行编码

但是不同国家不同标准，在多语言混合的文本中，就会出现乱码。Unicode编码把所有语言都统一到一套编码里，Unicode定义了字符与整数的对应关系，一般使用2个以上字节

为节省空间，UTF-8编码把Unicode字符根据不同的数字大小编码成1-6个字节，英文字母被编码成1个字节，汉字一般是3个字节，生僻的字符被编码成4-6个字节

### 3. Python 3中的字符编码
Python 3的重要改进是解决了Python 2中的字符编码问题：
- Python 3默认编码是UTF-8编码，Python 2是ASCII编码
- Python 3中字符串都是str类型，都是Unicode编码，Python 2中字符串有unicode和str是两种类型，分别是Unicode编码和ASCII编码

In [14]:
a = 'A'
b = '汉'
print(ord(a), bin(ord(a)), type(a), a.encode('utf-8'), len(a.encode('utf-8')))
print(ord(b), bin(ord(b)), type(b), b.encode('utf-8'), len(b.encode('utf-8')))

65 0b1000001 <class 'str'> b'A' 1
27721 0b110110001001001 <class 'str'> b'\xe6\xb1\x89' 3


在python代码文件中使用以下声明告知解释器使用utf-8的方式解码python代码文本

#-\*- coding: utf-8 -\*-

![](images/python3encode.jpg)

In [15]:
import sys
sys.getdefaultencoding()

'utf-8'

### 4. 使用标准库中的字符串函数

In [16]:
str1 = 'it is a beautifull day.'

print('str1 =', repr(str1))
print('str1.split() then', str1.split())
print('str1.capitalize() =', str1.capitalize())
print('str1.find("a") =', str1.find("a")) # 找不到返回-1
print('str1.index("a") =', str1.index("a")) # 找不到抛出异常
print('str1.endswith("ay") =', str1.endswith("ay"))
print('str1.startswith("it") =', str1.startswith("it"))
print('str1.title() =', str1.title())
print('str1.upper() =', str1.upper())
print('str1.lower() =', str1.lower())
print('str1.lstrip() =', str1.lstrip())
print("str1.rstrip('.') =", str1.rstrip('.'))
print('str1.strip() =', str1.strip())
print('str1.replace("beautifull","bad") =', str1.replace("beautifull", "bad"))
print('",".join(str1.split()) =', ",".join(str1.split()))
print('len(str1) =' ,len(str1))

str1 = 'it is a beautifull day.'
str1.split() then ['it', 'is', 'a', 'beautifull', 'day.']
str1.capitalize() = It is a beautifull day.
str1.find("a") = 6
str1.index("a") = 6
str1.endswith("ay") = False
str1.startswith("it") = True
str1.title() = It Is A Beautifull Day.
str1.upper() = IT IS A BEAUTIFULL DAY.
str1.lower() = it is a beautifull day.
str1.lstrip() = it is a beautifull day.
str1.rstrip('.') = it is a beautifull day
str1.strip() = it is a beautifull day.
str1.replace("beautifull","bad") = it is a bad day.
",".join(str1.split()) = it,is,a,beautifull,day.
len(str1) = 23


In [17]:
?str1.capitalize

[1;31mSignature:[0m [0mstr1[0m[1;33m.[0m[0mcapitalize[0m[1;33m([0m[1;33m)[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Return a capitalized version of the string.

More specifically, make the first character have upper case and the rest lower
case.
[1;31mType:[0m      builtin_function_or_method


### 5. 字符串格式化

In [18]:
print('it is a %s day.' % 'beautifull')
print('%s. Your age is %d' % ('Julia', 18))
print('''Hello, %s
It's a %s day.
Let's go %s.''' % ('Julia', 'bad', 'home'))

print('%10s%10s' % ('Name', 'Age'))
print('-' * 20)
print('%10s%10.2f' % ('Julia', 1.23456))

it is a beautifull day.
Julia. Your age is 18
Hello, Julia
It's a bad day.
Let's go home.
      Name       Age
--------------------
     Julia      1.23


### 6. 使用正则表达式
正则表达式(regular expression)描述了一种字符串匹配的模式（pattern），可以用来检查一个串是否含有某种子串、将匹配的子串替换或者从某个串中取出符合某个条件的子串等

|表示法|描述|正则表达式示例|可匹配字符串|不可匹配字符串|
|---|---|---|---|---|
|.|匹配任意一个字符|a.a|aaa或aba|aab|
|^|匹配字符串开始位置|^aa|aaa|baa|
|\$|匹配字符串结束位置|aa\$|aaa|aab|
|\*|匹配0次或多次前面出现的正则表达式|a\*b|b或ab或aab|cb|
|+|匹配1次或多次前面出现的正则表达式|a+b|ab或aab|b|
|?|匹配0次或1次前面出现的正则表达式|a?b|b或ab|aab|
|{N}|匹配N次前面出现的正则表达式|a{2}b|aab|ab|
|{N,M}|匹配N至M次前面出现的正则表达式|a{1,2}b|ab或aab|aaab|
|\d|匹配一个数字|data\d|data0或data1|dataa|
|\w|匹配一个字母或数字|data\w|data0或dataa|data-
|\s|匹配一个空格|data\sdata|data data|datadata|

In [19]:
import re
s = 'wise@163.com; soe@qq.com'
m1 = re.match('\w{2}', s) # 从字符串开始位置进行匹配
if m1 is not None:
    print(m1.group())
    
m2 = re.search('\d+', s) # 从字符串任意位置进行匹配
if m2 is not None:
    print(m2.group()) 

g1 = s.split()
print(g1)
    
g2 = re.split('[\s;]+', s)

print(g2)

wi
163
['wise@163.com;', 'soe@qq.com']
['wise@163.com', 'soe@qq.com']


In [20]:
m3 = re.search('(\w+)@(\d+)\.(\w+)', s)
if m3 is not None:
    print(m3.groups())

('wise', '163', 'com')


In [21]:
m3 = re.search('(\w+)@(.+?)\.(\w+)', s) # * + ?等默认采用贪婪匹配，后面加上？采用非贪婪匹配
if m3 is not None:
    print(m3.groups())

('wise', '163', 'com')


In [22]:
g3 = re.findall('(\w+)@(.+?)\.(\w+)', s) # * + ?等默认采用贪婪匹配，后面加上？采用非贪婪匹配
print(g3)

[('wise', '163', 'com'), ('soe', 'qq', 'com')]


In [23]:
re.sub('(wise|soe)', 'python', s)

'python@163.com; python@qq.com'

In [24]:
r = re.compile('(\w+)@(\w+)\.(\w+)')
for g in g2:
    m = r.search(g)
    if m is not None:
        print(m.groups()) 

('wise', '163', 'com')
('soe', 'qq', 'com')


### 编程实践：从字符串中去掉不需要的字符

In [25]:
s = '--- Hello   world \n!==='
print(s)

--- Hello   world 
!===


In [26]:
re.sub('[\s\n]+', ' ', s.lstrip('-').rstrip('=').strip())

'Hello world !'

## 三、代码阅读：通过股票成交推导持仓

- 数据说明：
    - 委托（/ch04/orders.csv），包含最终成交状态的委托数据：
        - order_code  委托代码
        - stock_code  标的代码        
        - order_date  委托日期
        - order_time  委托时间
        - operation   操作类型：1买入 2卖出 3融资买入 4融券卖出 5买券还券 6卖券还款
        - direction   操作方向：Direction.BUY Direction.SELL
        - status      委托状态：2待报 4已报 5废单 6部成 7成交 8部撤 9已撤 10待撤
        - volume      委托量
        - price       委托价
        - deal_volume 成交量
        - deal_price  成交价
- 通过股票委托的最终成交状态推导持仓：
    - 持仓：
        - stock_code  标的代码        
        - position_date  持仓日期
        - direction   操作方向：Direction.BUY Direction.SELL
        - volume      持仓量
        - price       持仓价
     - 根据委托成交顺序，将每个委托成交结果合并进当前持仓
     - 根据每个委托的买卖状态更新增减持仓量和持仓价，假设盈利和亏损不影响持仓价格

In [2]:
import sys
import os
import pandas as pd
import logging
sys.path.append(os.getcwd())
from ch01 import get_stock_cost, Direction
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(thread)d %(levelname)s %(module)s - %(message)s')
logger = logging.getLogger(__name__)

In [3]:
orders = pd.read_csv('data/ch04/orders.csv').to_dict(orient='records')

In [5]:
def get_positions(date):
    """
    通过股票委托的最终成交状态推导持仓
    :param int date: 截止日期
    :return list: 持仓列表
    """
    positions = []
    for order in orders:
        if order['order_date'] > date or order['deal_volume'] == 0:
            continue
        arr = [p for p in positions if p['stock_code'] == order['stock_code'] and p['position_date'] == date]
        o_direction = -1 if order['direction'] == Direction.SELL else 1
        fee = get_stock_cost(str(order['stock_code']), order['deal_price'], order['deal_volume'], order['direction'])
        # 已经有持仓
        if arr:
            position = arr[0]
            p_direction = -1 if position['direction'] == Direction.SELL else 1
            volume = p_direction * position['volume'] + o_direction * order['deal_volume']
            if volume == 0:
                position['volume'] = 0
                position['price'] = 0
                position['direction'] = ''
            else:
                position['direction'] = Direction.SELL if volume < 0 else Direction.BUY                
                position['price'] = (p_direction * position['volume'] * position['price'] +
                                     o_direction * order['deal_volume'] * order['deal_price'] +
                                     fee) / volume
                position['volume'] = -1 * volume if volume < 0 else volume
            logger.debug(position)
        else:
            position = {'stock_code': order['stock_code'],
                        'position_date': date,
                        'direction': order['direction'],
                        'volume': order['deal_volume'],
                        'price': (o_direction * order['deal_price'] * order['deal_volume'] + fee) / (o_direction * order['deal_volume'])
                        }
            logger.debug(position)
            positions.append(position)
    return positions

get_positions(20210105)

2021-07-06 17:24:20,678 3064 DEBUG <ipython-input-5-75508e881401> - {'stock_code': 600864, 'position_date': 20210105, 'direction': 'BUY', 'volume': 10000, 'price': 8.451858999999999}
2021-07-06 17:24:20,679 3064 DEBUG <ipython-input-5-75508e881401> - {'stock_code': 600864, 'position_date': 20210105, 'direction': 'BUY', 'volume': 20000, 'price': 8.446857899999998}
2021-07-06 17:24:20,680 3064 DEBUG <ipython-input-5-75508e881401> - {'stock_code': 600584, 'position_date': 20210105, 'direction': 'BUY', 'volume': 2700, 'price': 42.669385199999994}
2021-07-06 17:24:20,681 3064 DEBUG <ipython-input-5-75508e881401> - {'stock_code': 600864, 'position_date': 20210105, 'direction': 'BUY', 'volume': 12400, 'price': 8.43900740645161}
2021-07-06 17:24:20,681 3064 DEBUG <ipython-input-5-75508e881401> - {'stock_code': 600584, 'position_date': 20210105, 'direction': 'SELL', 'volume': 900, 'price': 42.663370800000024}
2021-07-06 17:24:20,682 3064 DEBUG <ipython-input-5-75508e881401> - {'stock_code': 600

[{'stock_code': 600864,
  'position_date': 20210105,
  'direction': '',
  'volume': 0,
  'price': 0},
 {'stock_code': 600584,
  'position_date': 20210105,
  'direction': '',
  'volume': 0,
  'price': 0}]