In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

## 数组对象

### 1. 向量加法

In [2]:
def numpySum(n):
    a = np.arange(n) ** 2
    b = np.arange(n) ** 3
    c = a + b
    return c

In [6]:
tmp = numpySum(3)
print(tmp[0:-1])

[0 2]


## numpy基础

### 2.1 numpy数组对象

In [7]:
a = np.arange(5)
a.dtype

dtype('int32')

In [8]:
print(a)
print(a.shape)

[0 1 2 3 4]
(5,)


### 2.2 多维数组

In [9]:
a = np.array([np.arange(2), np.arange(2)])

In [11]:
print(a)
print("shape", a.shape)

[[0 1]
 [0 1]]
shape (2, 2)


In [13]:
# 数据类型，单个元素占用字节数
print('数据类型', a.dtype)
print(a.dtype.itemsize)

数据类型 int32
4


In [15]:
t = np.dtype('f8')
print(t.char)
print(t.type)

d
<class 'numpy.float64'>


### 2.3 自定义数据类型
> 定义一个对象：例如商品（id,name,desc...）

In [17]:
t = np.dtype([('name', np.str_, 40), ('numitems', np.int32), ('price', np.float32)])

In [19]:
t

dtype([('name', '<U40'), ('numitems', '<i4'), ('price', '<f4')])

In [20]:
t['name']

dtype('<U40')

In [21]:
item = np.array([('meaning of life dvd', 42, 3.14),
                ('butter', 13, 2.72)], dtype=t)

In [26]:
print(item[1])
print('name:', item[0]['name'])

('butter', 13,  2.72000003)
name: meaning of life dvd


### 2.5 多维数组的切片和索引

In [28]:
b = np.arange(24).reshape(2,3,4)

In [29]:
print(b.shape)

(2, 3, 4)


In [30]:
print(b[:,:,:])

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]


In [35]:
print(b[0, 1, ::1])
print(b[0, 1, ::2])

[4 5 6 7]
[4 6]


### 2.6 改变数组维度

In [42]:
# ravel 将数组展平 ==返回原对象的引用，修改引用，影响原对象==
b.ravel()
print(b)

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]


In [43]:
# flatten 将数组展平 ==返回新的对象==
b.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [50]:
# 修改原始对象维度
b.shape = (6,4)
print(b)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]


In [49]:
# 矩阵转置
b.transpose()

array([[ 0,  4,  8, 12, 16, 20],
       [ 1,  5,  9, 13, 17, 21],
       [ 2,  6, 10, 14, 18, 22],
       [ 3,  7, 11, 15, 19, 23]])

In [53]:
# reshape 返回新的修改后对象 / resize 返回原始对象
a = b.reshape(2, 12)
print(b, a)

a = b.resize(4,6)
print(b, a)  # a = None

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]] [[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21 22 23]]
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]] None


### 2.8 组合数组

In [54]:
a = np.arange(9).reshape(3,3)

In [56]:
print(a)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [57]:
b = 2 * a
print(b)

[[ 0  2  4]
 [ 6  8 10]
 [12 14 16]]


In [58]:
# 水平组合, 增加第二个维度长度，其他不变
np.hstack((a, b))

array([[ 0,  1,  2,  0,  2,  4],
       [ 3,  4,  5,  6,  8, 10],
       [ 6,  7,  8, 12, 14, 16]])

In [59]:
np.concatenate((a, b), axis=1)

array([[ 0,  1,  2,  0,  2,  4],
       [ 3,  4,  5,  6,  8, 10],
       [ 6,  7,  8, 12, 14, 16]])

In [60]:
# 垂直组合, 增加第一个维度长度，其他不变
np.vstack((a, b))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 0,  2,  4],
       [ 6,  8, 10],
       [12, 14, 16]])

In [64]:
np.concatenate((a, b), axis=0)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 0,  2,  4],
       [ 6,  8, 10],
       [12, 14, 16]])

In [65]:
# 深度组合, 增加了一个维度，并在最小维度上1 1 对应
np.dstack((a,b))

array([[[ 0,  0],
        [ 1,  2],
        [ 2,  4]],

       [[ 3,  6],
        [ 4,  8],
        [ 5, 10]],

       [[ 6, 12],
        [ 7, 14],
        [ 8, 16]]])

In [66]:
# 列组合
# 对于一维数组，按列方向组合
oned = np.arange(2)
print(oned)

[0 1]


In [67]:
twice_oned = 2 * oned
print(twice_oned)

[0 2]


In [68]:
np.column_stack((oned, twice_oned))

array([[0, 0],
       [1, 2]])

In [69]:
# 对于二维数组，效果与hstack相同
np.column_stack((a,b))

array([[ 0,  1,  2,  0,  2,  4],
       [ 3,  4,  5,  6,  8, 10],
       [ 6,  7,  8, 12, 14, 16]])

In [70]:
np.column_stack((a, b)) == np.hstack((a, b))

array([[ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True]], dtype=bool)

### 2.9 数组的分割

### 2.10 分割数组

In [73]:
# 水平分割
a = np.arange(9).reshape((3,3))
print(a)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [74]:
np.hsplit(a, 3)

[array([[0],
        [3],
        [6]]), array([[1],
        [4],
        [7]]), array([[2],
        [5],
        [8]])]

In [76]:
np.split(a, 3, axis=1)

[array([[0],
        [3],
        [6]]), array([[1],
        [4],
        [7]]), array([[2],
        [5],
        [8]])]

In [77]:
# 垂直分割
np.vsplit(a, 3)

[array([[0, 1, 2]]), array([[3, 4, 5]]), array([[6, 7, 8]])]

In [80]:
np.split(a, 3, axis=0)

[array([[0, 1, 2]]), array([[3, 4, 5]]), array([[6, 7, 8]])]

In [81]:
# 深度分割
c = np.arange(27).reshape(3 , 3, 3)
print(c)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


In [82]:
np.dsplit(c, 3)

[array([[[ 0],
         [ 3],
         [ 6]],
 
        [[ 9],
         [12],
         [15]],
 
        [[18],
         [21],
         [24]]]), array([[[ 1],
         [ 4],
         [ 7]],
 
        [[10],
         [13],
         [16]],
 
        [[19],
         [22],
         [25]]]), array([[[ 2],
         [ 5],
         [ 8]],
 
        [[11],
         [14],
         [17]],
 
        [[20],
         [23],
         [26]]])]

### 2.11 数组的属性

In [86]:
# ndim 数组轴个数
b.ndim

2

In [87]:
# 元素总个数
b.size

9

In [90]:
# 一个元素所占字节数
print(b.dtype)
print(b.itemsize)

int32
4


In [91]:
# 数组所占存储空间 = itemsize * size
b.nbytes

36

In [92]:
# 数组转置
b.T

array([[ 0,  6, 12],
       [ 2,  8, 14],
       [ 4, 10, 16]])

In [93]:
# 像迭代一维数组一样迭代多维数组
f = b.flat

In [94]:
print(f)

<numpy.flatiter object at 0x0000012535407C20>


In [95]:
for item in f:
    print(item)

0
2
4
6
8
10
12
14
16


In [97]:
b.tolist()

[[0, 2, 4], [6, 8, 10], [12, 14, 16]]

## 3. 常用函数

### 3.2 读写文件

In [99]:
# 单位矩阵
i2 = np.eye(2)
print(i2)

[[ 1.  0.]
 [ 0.  1.]]


In [100]:
np.savetxt('eye.txt', i2)

In [102]:
i3 = np.loadtxt('eye.txt')

In [103]:
print(i3)

[[ 1.  0.]
 [ 0.  1.]]


### 3.4 csv文件

In [104]:
c, v = np.loadtxt('./tmp/apple.csv', delimiter=',', usecols=(6,7), unpack=True)

In [105]:
print(c)
print(v)

336.1
21144800.0


In [107]:
c, v = np.loadtxt('./tmp/stock.csv', delimiter=',', usecols=(2,4), unpack=True)

### 3.6 计算股票加权平均价格

In [113]:
# 计算成交量加权平均数：加权计算平均成交价
vwap = np.average(c, weights=v)
print('vwap =', vwap)

vwap = 22.4334945996


In [112]:
# 算数平均价格
print('mean =', np.mean(c))

mean = 20.5953810264


In [114]:
# 时间加权平均价格
t = np.arange(len(c))
print('twap =', np.average(c, weights=t))

twap = 19.3735485918


### 3.7 取值范围

In [117]:
h, l = np.loadtxt('./tmp/stock.csv', delimiter=',', usecols=(1,3), unpack=True)

In [118]:
print('highest =', np.max(h))
print('lowest =', np.min(l))

highest = 36.35
lowest = 10.55


In [119]:
print('median =', np.median(h))

median = 21.54


In [121]:
# 极差: 最大值，最小值之差
print('spread high price =', np.ptp(h))

spread high price = 25.34


### 3.9 统计分析

In [123]:
c = np.loadtxt('./tmp/stock.csv', delimiter=',', usecols=(2,), unpack=True)

In [124]:
print('median =', np.median(c))

median = 21.06


In [125]:
sorted_close = np.msort(c)
print('sorted =', sorted_close)

sorted = [ 10.79  10.91  10.92  10.96  10.97  11.    11.01  11.09  11.1   11.12
  11.23  11.26  11.34  11.42  11.47  11.48  11.5   11.61  11.67  11.76
  11.78  11.78  11.85  11.92  11.97  11.99  12.04  12.06  12.09  12.09
  12.12  12.13  12.17  12.2   12.21  12.22  12.27  12.31  12.36  12.5
  12.52  12.57  12.6   12.7   12.77  12.9   13.16  13.34  13.46  13.56
  13.85  13.95  13.97  13.99  14.01  14.02  14.1   14.11  14.28  14.3
  14.31  14.4   14.46  14.47  14.47  14.53  14.55  14.55  14.62  14.63
  14.64  14.65  14.75  14.85  14.87  14.93  14.95  14.97  14.97  15.01
  15.05  15.06  15.08  15.15  15.15  15.18  15.18  15.19  15.24  15.28
  15.28  15.29  15.3   15.3   15.3   15.31  15.34  15.38  15.38  15.4   15.4
  15.42  15.42  15.42  15.47  15.56  15.56  15.57  15.62  15.67  15.68
  15.75  15.77  15.77  15.9   15.9   15.94  15.98  16.01  16.07  16.08
  16.11  16.12  16.17  16.18  16.19  16.22  16.27  16.29  16.42  16.44
  16.46  16.48  16.66  16.74  16.81  16.82  16.96  16.99  17.29 

In [132]:
N = len(c)
print('middle =', sorted_close[int(len(c)/2)])
print('middle =', (sorted_close[int(N/2)] + sorted_close[int(N/2 + 1)])/2)

middle = 21.06
middle = 21.06


In [133]:
print('variance =', np.var(c))
print('variance from definition =', np.mean((c - c.mean()) ** 2))

variance = 20.5631094582
variance from definition = 20.5631094582


In [134]:
print('std =', np.std(c))

std = 4.53465648735


In [135]:
# 分析股票收益率
returns = np.diff(c) / c[: -1]
print('standard deviation =', np.std(returns))

standard deviation = 0.0407481965878


In [136]:
logreturns = np.diff(np.log(c))
print('log returns =',  logreturns)

log returns = [ 0.00952388  0.00515687  0.00555676 -0.01416637 -0.00563626  0.02022879
  0.01438265  0.00711449 -0.052655   -0.00617286 -0.01560094 -0.00902533
 -0.01645375  0.05469763  0.05351595  0.00289077  0.00164813  0.0167385
 -0.02003747  0.00986444 -0.01732717  0.04991907  0.00079145 -0.00754421
  0.03869938  0.07708884  0.01024201  0.03045218 -0.03785832 -0.00567982
  0.00991861  0.02334194 -0.00206825 -0.02799343 -0.00106515  0.07853864
  0.10526199  0.0178707  -0.07160953  0.0322262  -0.02136833 -0.08631038
 -0.05816801  0.06018413 -0.05025145 -0.04217601 -0.05564349 -0.01017223
 -0.00948249  0.04502118 -0.00342141  0.00190223  0.03032237  0.00844507
  0.01595971 -0.01852241  0.03529354 -0.06354821 -0.06262984  0.04972643
  0.0575191  -0.06865807 -0.00581059  0.00812541  0.03558286 -0.00596794
  0.00819677  0.0187471  -0.01356574 -0.00592375 -0.05182112 -0.04233859
 -0.02311287 -0.00796483  0.05247105  0.02289875 -0.05908892 -0.04618487
  0.01890091 -0.00554727 -0.03261868  

In [138]:
# 过滤数组值
print('filter with 30 =', np.where(c > 30))

filter with 30 = (array([ 36,  37,  38,  39,  40,  41, 525, 526, 527, 528, 529, 530, 531,
       532, 533], dtype=int64),)


In [140]:
# 波动率 = 对数收益率的标准差 / 其均值 / 交易日倒数的平方根
annual_volatility = np.std(logreturns) / np.mean(logreturns)
annual_volatility = annual_volatility / np.sqrt(1. / 252.)
print(annual_volatility)

-566.536934909


In [141]:
print('月波动率 =', annual_volatility * np.sqrt( 1. / 12.))

月波动率 = -163.545125938


### 3.13 日期分析

In [143]:
import datetime

In [148]:
dates, closes = np.loadtxt('./tmp/stock-date.csv', delimiter=',', usecols=(14,2), unpack=True,
                          converters={14: (lambda x: datetime.datetime.strptime(x.decode('utf-8'), "%Y-%m-%d").date().weekday())})

In [150]:
print('dates =', dates[:10])

dates = [ 4.  3.  2.  1.  0.  4.  3.  2.  1.  0.]


In [157]:
averages = np.zeros(5)
print(averages)

[ 0.  0.  0.  0.  0.]


In [161]:
# 计算工作日每天收盘价均值
for i in range(5):
    indices = np.where(dates == i)
    prices = np.take(closes, indices)
    avg = np.mean(prices)
    print('day %.4f average %.4f' % (i, avg))
    averages[i] = avg

day 0.0000 average 20.4509
day 1.0000 average 20.5877
day 2.0000 average 20.5467
day 3.0000 average 20.7076
day 4.0000 average 20.6834


In [162]:
top = np.max(averages)
print('highest average', top)
print('top day of the week', np.argmax(averages))

highest average 20.707578125
top day of the week 3
