# 逻辑运算

In [47]:
import numpy as np
stock_day_rise = np.random.normal(0, 1, [500, 504])
# 拷贝前四只股票的前四天的数据
temp = stock_day_rise[:4, :4].copy()

### 判断大于0.5的数据

In [48]:
print(temp)
print('是否大于0.5', temp > 0.5)
print('返回大于0.5的值', temp[temp> 0.5])

[[ 1.36171335 -0.75264901 -0.91935814  1.23658101]
 [ 0.62084888  0.14994789 -1.15786689 -0.30186736]
 [-0.28859243 -0.11566026 -0.38985182 -0.31748186]
 [-0.36867646  0.81134775  0.76654088  0.12462615]]
是否大于0.5 [[ True False False  True]
 [ True False False False]
 [False False False False]
 [False  True  True False]]
返回大于0.5的值 [1.36171335 1.23658101 0.62084888 0.81134775 0.76654088]


### 去重

In [49]:
temp[temp > 1] = 1
np.unique(temp)

array([-1.15786689, -0.91935814, -0.75264901, -0.38985182, -0.36867646,
       -0.31748186, -0.30186736, -0.28859243, -0.11566026,  0.12462615,
        0.14994789,  0.62084888,  0.76654088,  0.81134775,  1.        ])

### 三元运算

In [50]:
# where()三元运算, 参数1为判断条件, 参数2: 成立时的返回值, 参数3: 不成立时的返回值
np.where(temp>0.5, 1, 0)

array([[1, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 1, 1, 0]])

### 复合逻辑

In [51]:
temp

array([[ 1.        , -0.75264901, -0.91935814,  1.        ],
       [ 0.62084888,  0.14994789, -1.15786689, -0.30186736],
       [-0.28859243, -0.11566026, -0.38985182, -0.31748186],
       [-0.36867646,  0.81134775,  0.76654088,  0.12462615]])

In [52]:
# 逻辑与logical_and()
np.where(np.logical_and(temp>0.5, temp<1), 1, 0)

array([[0, 0, 0, 0],
       [1, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 1, 1, 0]])

In [53]:
# 逻辑或logical_and()
np.where(np.logical_or(temp>0.5, temp<-0.5), 1, 0)

array([[1, 1, 1, 1],
       [1, 0, 1, 0],
       [0, 0, 0, 0],
       [0, 1, 1, 0]])

# 统计运算

### 判断所有

In [54]:
np.all(temp>0.5)

False

### 获取最值

In [55]:
temp

array([[ 1.        , -0.75264901, -0.91935814,  1.        ],
       [ 0.62084888,  0.14994789, -1.15786689, -0.30186736],
       [-0.28859243, -0.11566026, -0.38985182, -0.31748186],
       [-0.36867646,  0.81134775,  0.76654088,  0.12462615]])

In [56]:
# 所有的最大值
print('所有的最大值', np.max(temp))
# 每一行的最小值
print('每一行的最小值',np.min(temp, axis=1))

所有的最大值 1.0
每一行的最小值 [-0.91935814 -1.15786689 -0.38985182 -0.36867646]


### 均值,中位数,方差,标准差

In [57]:
print('平均值为{}'.format(np.mean(temp, axis=1)))
print('中位数为{}'.format(np.median(temp, axis=1)))
print('方差为{}'.format(np.var(temp, axis=1)))
print('标准差为{}'.format(np.std(temp, axis=1)))

平均值为[ 0.08199821 -0.17223437 -0.27789659  0.33345958]
中位数为[ 0.1236755  -0.07595974 -0.30303715  0.44558351]
方差为[0.84620127 0.43026465 0.010134   0.23813574]
标准差为[0.91989199 0.65594562 0.10066776 0.48799153]


# 数组运算

### 数值运算

In [58]:
a = np.array([1,2,3,4])
a * 2

array([2, 4, 6, 8])

### 数组间的运算

In [59]:
a1 = np.array([[1,2], [3,4]])
b = np.array([[2], [2]])
# a1 和 b的维数相同并且shape不相同时对应位有一个为1,可以进行运算
a1 * b

array([[2, 4],
       [6, 8]])

In [60]:
a1 = np.array([[1,2,3], [4,5,6]])
b1 = np.array([[2, 2], [2, 2]])
# a1 和 b1 的维数相同,但是shape不相同而对应位也都不为1, 不能进行运算
a1 * b1

ValueError: operands could not be broadcast together with shapes (2,3) (2,2) 

In [None]:
a1 = np.array([[1,2,3], [4,5,6]])
b1 = np.array([[2, 2, 1], [2, 2, 1]])
# a1 和 b1 的维数相同,并且shape相同,能进行运算
a1 * b1

# 矩阵

### 数组转为矩阵

In [None]:
# 得出学生的最终成绩(平时+期末->3:7)
score = np.array([[80, 86],
[81, 89],
[83, 85],
[80, 91],
[88, 99],
[83, 86],
[82, 93],
[83, 80]                  
])
# 比例
percent = np.array([0.3, 0.7])

In [None]:
score = np.mat(score)

In [None]:
percent = np.mat(percent)

In [None]:
# 矩阵运算,参数可以为数组或矩阵,但是矩阵时注意行列必须对应
np.matmul(score, percent)

In [None]:
percent

# 七.合并，分割

In [73]:
a1 = stock_day_rise[:10, :100]
a2 = stock_day_rise[10:20, :100]
print(a1.shape)
print(a2.shape)

(10, 100)
(10, 100)


### 1.行列合并

In [66]:
a3 = np.concatenate([a1, a2], axis=0)
a3

array([[ 1.36171335, -0.75264901, -0.91935814, ...,  0.86443416,
        -0.10921353, -0.09809656],
       [ 0.62084888,  0.14994789, -1.15786689, ..., -0.43745488,
        -0.74152443,  2.20551435],
       [-0.28859243, -0.11566026, -0.38985182, ..., -0.77349638,
        -0.04469627,  2.74616938],
       ...,
       [ 0.93418362,  1.31750959, -0.88193732, ...,  0.3161574 ,
         0.30255802, -0.58145726],
       [-0.12875864, -0.88235529,  0.51071495, ...,  0.05603176,
        -1.28161413,  1.06353022],
       [ 0.34156779, -0.56952418,  0.24740587, ..., -1.58681964,
         1.15860151,  0.15913326]])

In [67]:
a3.shape

(20, 100)

### 2.分割

In [72]:
# 按行分割,分成两组, 每组十行,取每行的前十个数据
np.split(a3[:20, :10], 2, axis=0)

[array([[ 1.36171335e+00, -7.52649006e-01, -9.19358136e-01,
          1.23658101e+00, -1.10552903e+00,  1.14198013e+00,
         -1.82959710e+00, -7.27162669e-01, -8.14419641e-01,
         -2.49629526e+00],
        [ 6.20848877e-01,  1.49947890e-01, -1.15786689e+00,
         -3.01867360e-01,  4.48621684e-01, -4.32817066e-01,
          7.12384978e-01, -8.99135847e-01, -5.25142545e-01,
          1.22181967e+00],
        [-2.88592433e-01, -1.15660257e-01, -3.89851816e-01,
         -3.17481864e-01,  3.94036684e-01,  1.26396320e+00,
          2.80016040e-01,  2.20819687e+00,  4.49947474e-01,
         -3.42204340e-02],
        [-3.68676463e-01,  8.11347752e-01,  7.66540883e-01,
          1.24626145e-01, -8.54334353e-01, -5.37482819e-01,
         -1.90282327e-01,  7.81765283e-01, -1.05640929e+00,
         -2.16309618e+00],
        [ 1.06074779e+00,  2.77788676e-01,  5.97844776e-01,
         -6.57085403e-01,  4.56572314e-01,  1.21268900e+00,
         -4.60042936e-01,  4.17935884e-01, -2.792029

# 八.IO操作与数据处理

In [75]:
np.genfromtxt('D:/AI/data/numpy_test/test.csv', delimiter=',' )

array([[  nan,   nan,   nan,   nan],
       [  1. , 123. ,   1.4,  23. ],
       [  2. , 110. ,   nan,  18. ],
       [  3. ,   nan,   2.1,  19. ]])

# pandas

In [76]:
import pandas as pd

In [77]:
# 生成股票数据,符合标准正态分布,504个交易日的500支股票数据
stock_day_rise = np.random.normal(0, 1, [500, 504])

In [80]:
# 生成行索引
row_index = ['股票'+str(i) for i in range(stock_day_rise.shape[0]) ]
row_index

['股票0',
 '股票1',
 '股票2',
 '股票3',
 '股票4',
 '股票5',
 '股票6',
 '股票7',
 '股票8',
 '股票9',
 '股票10',
 '股票11',
 '股票12',
 '股票13',
 '股票14',
 '股票15',
 '股票16',
 '股票17',
 '股票18',
 '股票19',
 '股票20',
 '股票21',
 '股票22',
 '股票23',
 '股票24',
 '股票25',
 '股票26',
 '股票27',
 '股票28',
 '股票29',
 '股票30',
 '股票31',
 '股票32',
 '股票33',
 '股票34',
 '股票35',
 '股票36',
 '股票37',
 '股票38',
 '股票39',
 '股票40',
 '股票41',
 '股票42',
 '股票43',
 '股票44',
 '股票45',
 '股票46',
 '股票47',
 '股票48',
 '股票49',
 '股票50',
 '股票51',
 '股票52',
 '股票53',
 '股票54',
 '股票55',
 '股票56',
 '股票57',
 '股票58',
 '股票59',
 '股票60',
 '股票61',
 '股票62',
 '股票63',
 '股票64',
 '股票65',
 '股票66',
 '股票67',
 '股票68',
 '股票69',
 '股票70',
 '股票71',
 '股票72',
 '股票73',
 '股票74',
 '股票75',
 '股票76',
 '股票77',
 '股票78',
 '股票79',
 '股票80',
 '股票81',
 '股票82',
 '股票83',
 '股票84',
 '股票85',
 '股票86',
 '股票87',
 '股票88',
 '股票89',
 '股票90',
 '股票91',
 '股票92',
 '股票93',
 '股票94',
 '股票95',
 '股票96',
 '股票97',
 '股票98',
 '股票99',
 '股票100',
 '股票101',
 '股票102',
 '股票103',
 '股票104',
 '股票105',
 '股票106',
 '股票107',
 '股票108',
 '股票109',
 '股票110',


In [82]:
# 生成列索引
col_index = pd.date_range('2017-01-01', periods=stock_day_rise.shape[1], freq='B')
col_index

DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05',
               '2017-01-06', '2017-01-09', '2017-01-10', '2017-01-11',
               '2017-01-12', '2017-01-13',
               ...
               '2018-11-23', '2018-11-26', '2018-11-27', '2018-11-28',
               '2018-11-29', '2018-11-30', '2018-12-03', '2018-12-04',
               '2018-12-05', '2018-12-06'],
              dtype='datetime64[ns]', length=504, freq='B')

In [85]:
# 生成DataFrame类型的数据,参数1为数据源数组,参数2为行索引,参数3为列索引
all_ = pd.DataFrame(stock_day_rise, index=row_index, columns=col_index)
all_

Unnamed: 0,2017-01-02 00:00:00,2017-01-03 00:00:00,2017-01-04 00:00:00,2017-01-05 00:00:00,2017-01-06 00:00:00,2017-01-09 00:00:00,2017-01-10 00:00:00,2017-01-11 00:00:00,2017-01-12 00:00:00,2017-01-13 00:00:00,...,2018-11-23 00:00:00,2018-11-26 00:00:00,2018-11-27 00:00:00,2018-11-28 00:00:00,2018-11-29 00:00:00,2018-11-30 00:00:00,2018-12-03 00:00:00,2018-12-04 00:00:00,2018-12-05 00:00:00,2018-12-06 00:00:00
股票0,0.133137,1.584540,0.100433,1.022169,-0.016971,-0.790629,1.183126,-1.306960,1.364011,1.716279,...,-1.033473,0.298783,0.437777,-1.149691,-1.257037,0.906471,1.679898,0.728118,0.658797,-1.612211
股票1,0.038089,0.326967,-1.059358,-0.926832,-1.524691,-1.565462,0.243586,-0.263704,-0.107301,-0.485632,...,0.539972,-0.230067,-0.465933,-1.030764,1.199836,0.323747,-0.107355,0.176630,0.638482,0.422074
股票2,-1.582097,-0.884105,-0.387187,-0.942433,0.729428,-1.613717,-0.762966,-0.542804,0.407201,-0.990858,...,-1.003135,-0.137990,0.936975,-0.391260,0.250922,0.191642,-0.057105,0.451836,-2.546850,-0.347859
股票3,-1.029166,-0.722709,1.877114,-0.686140,1.554497,1.090195,1.447301,0.872898,1.139058,0.098317,...,-1.218524,-0.763920,-0.237946,-0.267997,-0.476971,0.404235,2.908936,0.566554,0.274778,-0.738352
股票4,-0.301991,1.192359,-0.169533,0.169893,-0.595420,1.570204,0.470439,-0.504576,-1.735147,-0.171897,...,0.506348,-0.686703,0.196355,-0.181634,-0.735060,0.563674,-0.680451,-2.077410,-0.715412,0.235961
股票5,-1.078853,0.918069,-1.340880,0.977926,-1.191167,-0.030686,-1.556254,-0.914179,-1.563103,-0.647446,...,1.134139,1.120328,0.078162,1.132025,0.215569,1.161802,0.002277,0.471219,0.082379,0.171915
股票6,-0.076850,-0.390790,0.666079,-0.162949,-0.440306,-0.996123,-0.230250,-0.398863,-0.630199,-0.037464,...,-0.011845,0.129995,-0.279203,-0.909852,1.060488,-0.376770,0.168073,-1.258989,-0.060426,-0.306667
股票7,-0.267812,0.643934,1.134102,0.505612,-0.133643,0.570186,-1.312049,0.909859,0.187266,0.438855,...,-0.460783,0.501347,-0.894848,-0.916579,0.147073,0.044248,-0.542264,1.296299,0.060925,0.224367
股票8,0.850798,-1.069398,-0.333574,0.482959,0.599667,-1.113464,0.469359,1.809391,-0.967316,0.572937,...,0.984279,1.525247,1.715308,0.486673,-2.131670,-0.718259,-1.771867,-0.026897,-2.485593,0.203747
股票9,-0.584283,-0.966437,0.291961,1.554879,1.587577,0.567569,0.340175,-1.569596,-1.245403,-0.517646,...,2.363816,0.612723,1.203972,1.218732,-0.941521,0.122744,2.017210,-1.148595,0.378138,-0.016347


In [86]:
# DataFrame数据支持转置
all_.T

Unnamed: 0,股票0,股票1,股票2,股票3,股票4,股票5,股票6,股票7,股票8,股票9,...,股票490,股票491,股票492,股票493,股票494,股票495,股票496,股票497,股票498,股票499
2017-01-02,0.133137,0.038089,-1.582097,-1.029166,-0.301991,-1.078853,-0.076850,-0.267812,0.850798,-0.584283,...,-0.681479,-0.856772,-1.632359,0.247509,0.465601,-0.445630,1.289646,0.430451,0.160700,1.008341
2017-01-03,1.584540,0.326967,-0.884105,-0.722709,1.192359,0.918069,-0.390790,0.643934,-1.069398,-0.966437,...,0.010581,0.321827,0.956732,-0.471282,-1.317941,0.728133,1.455034,0.647271,-0.684567,0.671089
2017-01-04,0.100433,-1.059358,-0.387187,1.877114,-0.169533,-1.340880,0.666079,1.134102,-0.333574,0.291961,...,-1.506499,2.259181,-1.312995,-1.143476,0.281488,-0.003824,-0.141427,-0.969337,0.146534,0.069754
2017-01-05,1.022169,-0.926832,-0.942433,-0.686140,0.169893,0.977926,-0.162949,0.505612,0.482959,1.554879,...,0.633121,0.869580,0.075181,-0.189674,0.137845,-1.754316,0.277137,-0.258558,0.215478,-1.126643
2017-01-06,-0.016971,-1.524691,0.729428,1.554497,-0.595420,-1.191167,-0.440306,-0.133643,0.599667,1.587577,...,0.533556,0.959313,0.992051,-0.213244,-0.711286,0.579625,-1.508836,-0.881624,0.265865,0.717733
2017-01-09,-0.790629,-1.565462,-1.613717,1.090195,1.570204,-0.030686,-0.996123,0.570186,-1.113464,0.567569,...,-0.121731,-0.126004,-0.176858,0.396388,0.808827,0.271645,-0.365131,0.699854,-1.494794,-0.382912
2017-01-10,1.183126,0.243586,-0.762966,1.447301,0.470439,-1.556254,-0.230250,-1.312049,0.469359,0.340175,...,0.262852,-0.538653,0.749445,0.897113,-2.085242,-0.282586,-1.320233,0.442070,0.998710,1.023206
2017-01-11,-1.306960,-0.263704,-0.542804,0.872898,-0.504576,-0.914179,-0.398863,0.909859,1.809391,-1.569596,...,0.601485,0.514915,1.450026,-0.208186,-0.119220,0.169601,0.209496,-0.358638,-2.096519,0.228514
2017-01-12,1.364011,-0.107301,0.407201,1.139058,-1.735147,-1.563103,-0.630199,0.187266,-0.967316,-1.245403,...,0.140690,1.434359,1.647349,-0.044608,-0.059663,0.557450,-1.653842,1.504459,-0.148449,-0.916702
2017-01-13,1.716279,-0.485632,-0.990858,0.098317,-0.171897,-0.647446,-0.037464,0.438855,0.572937,-0.517646,...,1.688992,1.501124,-1.239914,0.534253,0.303433,1.048344,-0.028221,0.167689,0.438943,1.646259


In [87]:
# DataFrame类型数据的index属性,返回行索引
all_.index

Index(['股票0', '股票1', '股票2', '股票3', '股票4', '股票5', '股票6', '股票7', '股票8', '股票9',
       ...
       '股票490', '股票491', '股票492', '股票493', '股票494', '股票495', '股票496', '股票497',
       '股票498', '股票499'],
      dtype='object', length=500)

In [88]:
# DataFrame类型数据的index属性,返回列索引
all_.columns

DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05',
               '2017-01-06', '2017-01-09', '2017-01-10', '2017-01-11',
               '2017-01-12', '2017-01-13',
               ...
               '2018-11-23', '2018-11-26', '2018-11-27', '2018-11-28',
               '2018-11-29', '2018-11-30', '2018-12-03', '2018-12-04',
               '2018-12-05', '2018-12-06'],
              dtype='datetime64[ns]', length=504, freq='B')

In [89]:
# DataFrame类型数据的index属性,返回数据源数组
all_.values

array([[ 0.1331369 ,  1.58453966,  0.10043294, ...,  0.72811789,
         0.65879695, -1.61221111],
       [ 0.03808944,  0.32696689, -1.05935782, ...,  0.17663006,
         0.63848234,  0.4220743 ],
       [-1.58209724, -0.88410502, -0.38718706, ...,  0.45183602,
        -2.54685018, -0.34785896],
       ...,
       [ 0.43045102,  0.64727064, -0.96933731, ..., -0.41494572,
        -0.49667333, -0.1523514 ],
       [ 0.16070017, -0.68456653,  0.1465344 , ..., -1.86424558,
        -1.43372332, -1.02508138],
       [ 1.00834097,  0.67108865,  0.0697536 , ...,  0.58383631,
         0.74678774,  1.22255206]])