### 1. 测算代码运算时间

#### 平凡方法

In [1]:
import time 
tic = time.time()
much_job = [x ** 2 for x in range(1, 1000000, 3)]
toc = time.time()
print("used {:.5}s".format(toc - tic))

used 0.14775s


#### jupyter环境

In [2]:
%%time
much_job = [x ** 2 for x in range(1, 1000000, 3)]

Wall time: 144 ms


### 2. 测算代码多次运行平均时间

#### 平凡方法

In [3]:
from timeit import timeit
g = lambda x:x ** 2 + 1
def main():
    return (g(2) ** 120)
timeit('main()',globals={'main':main},number=10)

1.777775409550486e-05

#### 快捷方法（jupyter环境）

In [4]:
%%timeit -n 10
g = lambda x:x ** 2 + 1
def main():
    return (g(2) ** 120)
main()

1.65 µs ± 180 ns per loop (mean ± std. dev. of 7 runs, 10 loops each)


### 3. 按调用函数分析代码运行时间

#### 平凡方法

In [5]:
def relu(x):
    return x if x > 0 else 0
def main():
    result = [relu(x) for x in range(-10000,10000,1)]
    return result

In [6]:
import profile
profile.run('main()')

         20006 function calls in 0.031 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.031    0.031 :0(exec)
        1    0.000    0.000    0.000    0.000 :0(setprofile)
    20000    0.000    0.000    0.000    0.000 <ipython-input-5-e084a4768f8a>:1(relu)
        1    0.000    0.000    0.031    0.031 <ipython-input-5-e084a4768f8a>:3(main)
        1    0.031    0.031    0.031    0.031 <ipython-input-5-e084a4768f8a>:4(<listcomp>)
        1    0.000    0.000    0.031    0.031 <string>:1(<module>)
        1    0.000    0.000    0.031    0.031 profile:0(main())
        0    0.000             0.000          profile:0(profiler)




#### 快捷方法（jupyter环境）

In [7]:
%prun main()

 

### 5. 用set而非list进行查找

#### 低速方法

In [8]:
data = (i ** 2 + 1 for i in range(1000000))
list_data = list(data)
set_data = set(data)

In [9]:
%%time
1098987 in list_data

Wall time: 12.5 ms


False

#### 高速方法

In [10]:
%%time
1098987 in set_data

Wall time: 0 ns


False

### 6. 用dict而非两个list进行匹配查找

#### 低速方法

In [11]:
list_a = [2 * i - 1 for i in range(1000000)]
list_b = [i ** 2 for i in list_a]
dict_ab = dict(zip(list_a,list_b))

In [12]:
%%time
print(list_b[list_a.index(876567)])

768369705489
Wall time: 8.52 ms


#### 高速方法

In [13]:
%%time
print(dict_ab.get(876567,None))

768369705489
Wall time: 0 ns


### 7. 优先使用for循环而不是while循环

#### 低速方法

In [18]:
%%time
s,i = 0,0
while i < 10000:
    i = i + 1
    s = s + i
print(s)

50005000
Wall time: 4.04 ms


#### 高速方法

In [19]:
%%time
s = 0
for i in range(1,10001):
    s = s + i
print(s)

50005000
Wall time: 1.5 ms


### 8. 在循环体中避免重复计算

#### 低速方法

In [20]:
a = [i ** 2 + 1 for i in range(2000)]

In [21]:
%%time
b = [i / sum(a) for i in a]

Wall time: 43.6 ms


#### 高速方法

In [22]:
%%time
sum_a = sum(a)
b = [i /sum_a for i in a]

Wall time: 0 ns


### 9. 用循环机制代替递归函数

#### 低速方法

In [23]:
%%time
def fib(n):
    return 1 if n in (1,2) else fib(n - 1) + fib(n - 2)
print(fib(30))

832040
Wall time: 290 ms


#### 高速方法

In [24]:
%%time
def fib(n):
    if n in (1,2):
        return 1
    a , b = 1, 1
    for i in range(2,n):
        a,b = b,a+b
    return b
print(fib(30))

832040
Wall time: 0 ns


### 10. 用缓存机制加速递归函数

#### 低速方法

In [25]:
%%time
def fib(n):
    return 1 if n in (1,2) else fib(n - 1) + fib(n - 2)
print(fib(30))

832040
Wall time: 283 ms


#### 高速方法

In [26]:
%%time
from functools import lru_cache
@lru_cache(100)
def fib(n):
    return 1 if n in (1,2) else fib(n - 1) + fib(n - 2)
print(fib(30))

832040
Wall time: 0 ns


### 11. 用numba加速Python函数

#### 低速方法

In [27]:
%%time
def my_power(x):
    return x ** 2
def my_powwe_sum(n):
    s = 0
    for i in range(1,n+1):
        s = s + my_power(i)
    return s
print(my_powwe_sum(1000000))

333333833333500000
Wall time: 513 ms


#### 高速方法

In [30]:
%%time
from numba import jit
@jit
def my_power(x):
    return x ** 2
@jit
def my_powwe_sum(n):
    s = 0
    for i in range(1,n+1):
        s = s + my_power(i)
    return s
print(my_powwe_sum(1000000))

333333833333500000
Wall time: 78.7 ms


### 12. 使用collections.Counter加速计数

#### 低速方法

In [32]:
data = [x ** 2 % 1989 for x in range(2000000)]

In [33]:
%%time
values_count = {}
for i in data:
    i_cnt = values_count.get(i,0)
    values_count[i] = i_cnt + 1
print(values_count.get(4,0))

8044
Wall time: 750 ms


#### 高速方法

In [34]:
%%time
from collections import Counter
values_count = Counter(data)
print(values_count.get(4,0))

8044
Wall time: 211 ms


### 13. 使用collections.ChainMap加速字典合并

#### 低速方法

In [41]:
dic_a = {i:i + 1 for i in range(1,1000000,2)}
dic_b = {i:2 * i + 1 for i in range(1,1000000,3)}
dic_c = {i:3 * i + 1 for i in range(1,1000000,5)}
dic_d = {i:4 * i + 1 for i in range(1,1000000,7)}

In [44]:
%%time
result = dic_a.copy()
result.update(dic_b)
result.update(dic_c)
result.update(dic_d)
print(result.get(9999,0))

10000
Wall time: 117 ms


#### 高速方法

In [45]:
%%time
from collections import ChainMap
chain = ChainMap(dic_a,dic_b,dic_c,dic_d)
###print(chain.get(9999,0))

10000
Wall time: 0 ns


## 使用numpy向量化加速
### 14. 使用np.array代替list

#### 低速方法

In [46]:
%%time
a = range(1,1000000,3)
b = range(1000000,1,-3)
c = [3 * a[i] - 2 * b[i] for i in range(0,len(a))]

Wall time: 173 ms


#### 高速方法

In [48]:
%%time
import numpy as np
array_a = np.arange(1,1000000,3)
array_b = np.arange(1000000,1,-3)
array_c = 3 * array_a - 2 * array_b

Wall time: 5.01 ms


### 15. 使用np.ufunc代替math.func

#### 低速方法

In [49]:
%%time
import math
a = range(1,1000000,3)
b = [math.log(x) for x in a]

Wall time: 66.2 ms


#### 高速方法

In [51]:
%%time
array_a = np.arange(1,1000000,3)
array_b = np.log(array_a)

Wall time: 6.02 ms


### 16. 使用np.where代替if

#### 低速方法

In [52]:
array_a = np.arange(-100000,100000)

In [54]:
%%time
# np.vectorize可以将普通函数转换成支持向量化的函数
relu = np.vectorize(lambda x:x if x > 0 else 0)
array_b = relu(array_a)

Wall time: 47.6 ms


#### 高速方法

In [55]:
%%time
relu = lambda x:np.where(x > 0 ,x,0)
array_b = relu(array_a)

Wall time: 3.02 ms


## 加速你的Pandas
### 17. 使用np.ufunc函数代替applymap

#### 低速方法

In [56]:
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.randint(-10,11,size=(100000,26)),columns=list("abcdefghijklmnopqrstuvwxyz"))

%time dfresult = df.applymap(lambda x:np.sin(x) + np.cos(x))

Wall time: 8.38 s


In [57]:
dfresult.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,...,q,r,s,t,u,v,w,x,y,z
0,1.381773,1.242586,0.493151,-1.410446,-1.134858,-1.131113,0.103159,0.843858,1.381773,0.096916,...,-1.383093,-0.499012,1.239586,-0.499012,-0.301169,1.381773,1.381773,1.0,1.0,1.0
1,-0.848872,-1.325444,-1.325444,-0.848872,0.096916,-1.131113,0.103159,-0.675262,1.410889,1.239586,...,-1.323249,-0.29505,1.381773,-1.323249,1.381773,0.096916,-0.29505,-0.848872,1.410889,0.103159
2,1.0,1.410889,-1.410446,1.410889,1.242586,1.242586,-1.134858,-0.499012,-0.301169,-1.131113,...,-0.675262,-1.323249,-0.848872,0.493151,0.843858,-1.131113,-1.131113,1.410889,1.0,1.0
3,-1.134858,-0.301169,0.843858,-0.29505,-0.499012,-1.323249,-0.29505,0.493151,1.242586,-0.675262,...,1.242586,1.0,0.680755,0.843858,-0.29505,-0.29505,-0.29505,0.493151,-1.383093,0.493151
4,1.381773,1.242586,0.103159,-1.325444,-1.325444,1.239586,-1.410446,-0.675262,-1.323249,1.410889,...,0.680755,-1.410446,-1.410446,1.0,-1.325444,-0.848872,1.242586,1.381773,-1.383093,1.0


#### 高速方法

In [58]:
%%time
dfresult = np.sin(df) + np.cos(df)

Wall time: 310 ms


In [59]:
dfresult.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,...,q,r,s,t,u,v,w,x,y,z
0,1.381773,1.242586,0.493151,-1.410446,-1.134858,-1.131113,0.103159,0.843858,1.381773,0.096916,...,-1.383093,-0.499012,1.239586,-0.499012,-0.301169,1.381773,1.381773,1.0,1.0,1.0
1,-0.848872,-1.325444,-1.325444,-0.848872,0.096916,-1.131113,0.103159,-0.675262,1.410889,1.239586,...,-1.323249,-0.29505,1.381773,-1.323249,1.381773,0.096916,-0.29505,-0.848872,1.410889,0.103159
2,1.0,1.410889,-1.410446,1.410889,1.242586,1.242586,-1.134858,-0.499012,-0.301169,-1.131113,...,-0.675262,-1.323249,-0.848872,0.493151,0.843858,-1.131113,-1.131113,1.410889,1.0,1.0
3,-1.134858,-0.301169,0.843858,-0.29505,-0.499012,-1.323249,-0.29505,0.493151,1.242586,-0.675262,...,1.242586,1.0,0.680755,0.843858,-0.29505,-0.29505,-0.29505,0.493151,-1.383093,0.493151
4,1.381773,1.242586,0.103159,-1.325444,-1.325444,1.239586,-1.410446,-0.675262,-1.323249,1.410889,...,0.680755,-1.410446,-1.410446,1.0,-1.325444,-0.848872,1.242586,1.381773,-1.383093,1.0


### 18. 使用预分配存储代替动态扩容

#### 低速方法

In [60]:
%%time
df = pd.DataFrame(columns=list("abcdefghijklmnopqrstuvwxyz"))
for i in range(10000):
    df.loc[i,:] = range(i,i + 26)

Wall time: 16 s


In [61]:
df.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,...,q,r,s,t,u,v,w,x,y,z
0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,16.0,17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0
1,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,...,17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0
2,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,...,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0
3,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,...,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0
4,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,...,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0


#### 高速方法

In [63]:
%%time
df = pd.DataFrame(np.zeros((10000,26)),columns=list("abcdefghijklmnopqrstuvwxyz"))
for i in range(10000):
    df.loc[i,:] = range(i,i + 26)

Wall time: 2.49 s


### 19. 使用csv文件读写代替excel文件读写

#### 低速方法

In [65]:
%%time
df.to_excel("data.xlsx")

Wall time: 5.04 s


#### 高速方法

In [67]:
%%time
df.to_csv("data.csv")

Wall time: 283 ms


### 20. 使用pandas多进程工具pandarallel

In [68]:
df = pd.DataFrame(np.random.randint(-10,11,size=(100000,26)),
                  columns=list("abcdefghijklmnopqrstuvwxyz"))

In [70]:
%%time
result = df.apply(np.sum,axis=1)

Wall time: 9.57 s


In [71]:
result.head()

0   -41
1    35
2   -41
3    68
4     6
dtype: int64

#### 高速方法

In [None]:
%%time
from pandarallel import pandarallel
pandarallel.initiallize(nb_workers=4)
result = df.parallel_apply(np.sum,axis=1)

## 使用Dask进行加速
### 21. 使用dask加速dataframe

#### 低速方法

In [74]:
df = pd.DataFrame(np.random.randint(-10,11,size=(100000,26)),
                  columns=list("abcdefghijklmnopqrstuvwxyz"))
%time df.groupby('a').mean()

Wall time: 54.6 ms


Unnamed: 0_level_0,b,c,d,e,f,g,h,i,j,k,...,q,r,s,t,u,v,w,x,y,z
a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-10,0.066513,0.038397,-0.051825,0.199119,0.032522,0.048258,0.089173,0.087285,-0.176878,-0.026437,...,-0.100923,0.032102,0.097146,0.048468,-0.052665,-0.096517,-0.086236,0.056022,0.026227,-0.02329
-9,0.076331,-0.168484,-0.003635,-0.052384,0.112038,-0.037631,-0.088518,-0.258499,-0.147317,0.097926,...,0.037631,0.012615,0.075903,-0.076545,-0.018602,0.085311,-0.002566,0.124439,-0.092795,0.050032
-8,-0.123756,-0.016376,0.136816,0.174337,0.072761,-0.063847,-0.147181,-0.08893,0.027985,-0.049544,...,0.004146,0.099502,0.060945,-0.024876,-0.148217,0.014718,-0.043118,-0.13806,-0.066128,0.065506
-7,-0.121046,0.136651,0.167018,-0.096584,-0.020666,0.056938,-0.025306,0.11957,0.119781,-0.141501,...,-0.070856,0.023408,-0.0814,-0.004639,-0.060523,-0.027836,0.063264,-0.094053,0.015183,0.0
-6,0.128793,0.047884,-0.070175,0.035707,-0.03839,-0.046852,-0.087926,0.009701,0.034056,-0.169866,...,0.106502,-0.103612,0.030341,-0.033024,0.030547,0.160784,-0.03323,0.107327,0.010526,-0.088338
-5,0.133834,0.078591,0.102564,-0.068793,-0.086095,-0.195747,-0.10444,-0.021055,-0.074838,-0.142172,...,0.026892,-0.06379,0.006671,0.202627,0.097769,0.002293,-0.029185,-0.089014,0.103815,-0.053784
-4,0.110806,0.085268,-0.054664,0.062263,-0.001688,0.040523,0.217391,-0.013086,0.115027,-0.03377,...,-0.131068,-0.001477,-0.120937,-0.013086,0.021106,-0.123892,-0.040101,0.072393,-0.007809,-0.066695
-3,-0.104084,-0.084607,-0.050471,0.013822,-0.080419,-0.062827,-0.004398,0.038953,-0.010052,0.059686,...,0.002723,0.12377,-0.066178,0.045026,-0.03644,0.035183,0.050052,0.024084,-0.014869,0.055707
-2,-0.078025,0.074818,-0.027576,-0.101753,0.055793,0.006199,-0.049808,0.164173,-0.010047,-0.010047,...,-0.006627,-0.036554,-0.087858,0.022232,0.000855,-0.245618,0.05451,-0.025866,-0.11522,-0.021163
-1,0.001064,-0.082145,0.017238,-0.034688,0.099808,0.018089,0.041073,-0.075122,0.166206,0.020004,...,-0.04469,0.092573,-0.077676,0.014046,0.179825,0.115982,0.026389,-0.012769,-0.018089,0.047244


#### 高速方法

In [None]:
import dask.dataframe as dd
df_dask = dd.from_pandas(df,npartitions=40)
%time df_dask.groupby('a').mean().compute()

### 22. 使用dask.delayed进行加速

#### 低速方法

In [76]:
import time
def muchjob(x):
    time.sleep(5)
    return x ** 2

In [78]:
%%time
result = [muchjob(i) for i in range(5)]
result

Wall time: 25 s


#### 高速方法

In [79]:
%%time
from dask import delayed,compute
from dask import threaded,multiprocessing
values = [delayed(muchjob)(i) for i in range(5)]
result = compute(*values,scheduler='multiprocessing')

Wall time: 10 s


## 应用多线程多进程加速
### 23. 应用多线程加速IO密集型任务

#### 低速方法

In [80]:
%%time
def writefile(i):
    with open(str(i) + '.txt','w') as f:
        s = ('hello %d' % i) * 10000000
        f.write(s)
    
# 串行任务
for i in range(10):
    writefile(i)

Wall time: 9.24 s


#### 高速方法

In [84]:
%%time
import threading

def writefile(i):
    with open(str(i) + '.txt','w') as f:
        s = ('hello %d' % i) * 10000000
        f.write(s)

# 多线程任务
thread_list = []
for i in range(10):
    t = threading.Thread(target=writefile,args=(i,))
    t.setDaemon(True)
    thread_list.append(t)
    
for t in thread_list:
    t.start() # 启动线程
    
for t in thread_list:
    t.join() # 等待子线程结束

Wall time: 9.24 s


### 24. 应用多进程加速CPU密集型任务

#### 低速方法

In [85]:
%%time
import time
def nuchjob(x):
    time.sleep(5)
    return x ** 2

# 串行任务
ans = [muchjob(i) for i in range(8)]
print(ans)

[0, 1, 4, 9, 16, 25, 36, 49]
Wall time: 40 s


#### 高速方法

In [None]:
%%time
import multiprocessing
data = range(8)

def muchjob(x):
    time.sleep(5)
    return x ** 2

# 多进程任务
pool = multiprocessing.Pool(processes=4)
result = []
for i in range(8):
    result.append(pool.apply_async(muchjob,(1,)))

pool.close()
pool.join()
ans = [res.get() for res in result]
print(ans)