© Валерий Студенников, курс "Инструменты анализа данных"

# numpy

Библиотека для работы с многометными массивами.

Есть прекрасные руководства на русском языке:
- https://pyprog.pro/short_guide.html
- https://pyprog.pro/reference_manual.html

In [None]:
import numpy as np
import pandas as pd
import sklearn.datasets

In [32]:
!hostname

MacBook-Pro-Valeriy-2.local


![оси numpy](https://www.safaribooksonline.com/library/view/elegant-scipy/9781491922927/assets/elsp_0105.png)

### Creating arrays

In [3]:
a = np.array([1,2,3])
a.shape, a.dtype, a

((3,), dtype('int64'), array([1, 2, 3]))

In [5]:
b = np.array([(1.5,2,3), (4,5,6)], dtype = float)
b.shape, b.dtype, b

((2, 3), dtype('float64'), array([[1.5, 2. , 3. ],
        [4. , 5. , 6. ]]))

In [6]:
c = np.array( [[(1.5,2,3), (4,5,6)], [(3,2,1), (4,5,6)]] )
c.shape, c.dtype, c

((2, 2, 3), dtype('float64'), array([[[1.5, 2. , 3. ],
         [4. , 5. , 6. ]],
 
        [[3. , 2. , 1. ],
         [4. , 5. , 6. ]]]))

### creating specific arrays

In [9]:
z = np.zeros((3,4), dtype = int )
z.shape, z.dtype

((3, 4), dtype('int64'))

In [11]:
o = np.ones((2,3,4), dtype=np.int16 )
o.shape, o.dtype

((2, 3, 4), dtype('int16'))

In [4]:
np.arange(10, 25, 5)

array([10, 15, 20])

In [16]:
np.linspace(0, 10, 11)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [19]:
np.full( (2,2), 7 )

array([[7, 7],
       [7, 7]])

In [22]:
data = sklearn.datasets.load_boston()

np.hstack( (data.data, np.matrix(data.target).T) ).shape, data.data.shape
#data.data.shape, np.matrix(data.target).T.shape, type(data.data), type(data.target)

#df = pd.DataFrame( data.data, columns=data.feature_names )
# df.head()
# boston.keys()

((506, 14), (506, 13))

In [21]:
np.eye( 3 )

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [27]:
np.empty((3,2))

array([[1.39069238e-309, 1.39069238e-309],
       [1.39069238e-309, 1.39069238e-309],
       [1.39069238e-309, 1.39069238e-309]])

### Случайные распределения

https://docs.scipy.org/doc/numpy/reference/routines.random.html

In [22]:
np.random.random((2,2))

array([[0.93424742, 0.13314525],
       [0.60290273, 0.73372887]])

In [26]:
np.random.normal( 0, 1, (10,) )

array([ 0.03829468,  0.90056936,  0.36617154,  0.17117851,  0.89558506,
       -1.23339977, -0.71579775,  0.2560889 ,  0.35422933, -0.23749679])

In [264]:
np.random.exponential( 100, (5,5) )

array([[ 43.04690365,  47.47819829,  14.37591629,  56.0621001 ,
        203.59365562],
       [ 13.39087697,  33.02057811,  22.89687185, 137.32886412,
         15.50816473],
       [ 35.87968883,  48.46345935, 194.91313515,   4.6328604 ,
        236.92743141],
       [244.74870303,  50.40232066, 110.15717494, 180.10962556,
        267.31295515],
       [ 60.41060526,  55.21529336,  36.10179194,  68.19501591,
        309.86183671]])

### Сохранение / загрузка

In [30]:
np.save( '/tmp/ones.npy', o )
np.savez( '/tmp/ones.npz', o )

In [33]:
np.load('/tmp/ones.npy')

array([[[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]], dtype=int16)

In [36]:
 np.savetxt("/tmp/ones.txt", b, delimiter=" ")

In [37]:
 np.loadtxt("/tmp/ones.txt")

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

In [38]:
np.genfromtxt("/tmp/ones.txt", delimiter=' ')

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

### Типы данных

* np.int64 — Signed 64-bit integer types
* np.float32 — Standard double-precision floating point
* np.complex — Complex numbers represented by 128 floats
* np.bool — Boolean type storing TRUE and FALSE values
* np.object — Python object type
* np.string_ — Fixed-length string type
* np.unicode_ — Fixed-length unicode type

## Информация о массиве

In [6]:
print( b.shape )
print( len(b) )
print( b.ndim )
print( b.size )
print( b.dtype )
print( b.dtype.name )
b.astype(int) 

(2, 3)
2
2
6
float64
float64


array([[1, 2, 3],
       [4, 5, 6]])

### Арифметические операции

In [10]:
e1 = np.eye( 3 )
o1 = np.ones( (3,3) )
o1 + e1  # o1 - e1

array([[2., 1., 1.],
       [1., 2., 1.],
       [1., 1., 2.]])

In [11]:
np.add( e1, o1 )

array([[2., 1., 1.],
       [1., 2., 1.],
       [1., 1., 2.]])

In [12]:
np.subtract( o1, e1 )

array([[0., 1., 1.],
       [1., 0., 1.],
       [1., 1., 0.]])

In [13]:
e1 * o1 * 2 # np.multiply( e1, o1 )

array([[2., 0., 0.],
       [0., 2., 0.],
       [0., 0., 2.]])

In [14]:
e1 / o1 / 2 #  np.divide( e1, o1 )

array([[0.5, 0. , 0. ],
       [0. , 0.5, 0. ],
       [0. , 0. , 0.5]])

In [84]:
np.exp( np.linspace(-2*np.pi, 2*np.pi, 10) )

array([1.86744273e-03, 7.54460979e-03, 3.04807938e-02, 1.23144711e-01,
       4.97513941e-01, 2.00999393e+00, 8.12052740e+00, 3.28075445e+01,
       1.32544960e+02, 5.35491656e+02])

In [86]:
np.sqrt( o1 * 2 )

array([[1.41421356, 1.41421356, 1.41421356],
       [1.41421356, 1.41421356, 1.41421356],
       [1.41421356, 1.41421356, 1.41421356]])

In [92]:
np.dot( np.array([1,2]), np.array([3,4]) )

11

In [93]:
# np.sin / np.cos / np.log и т.п.

### Сравнение

In [94]:
o1 == e1

array([[ True, False, False],
       [False,  True, False],
       [False, False,  True]])

In [97]:
o1 > e1

array([[False,  True,  True],
       [ True, False,  True],
       [ True,  True, False]])

In [99]:
e1 >= 1

array([[ True, False, False],
       [False,  True, False],
       [False, False,  True]])

### Аггрегатные функции

In [24]:
[ np.array(range(3)) + x*3 for x in range(3) ]

[array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8])]

In [29]:
#a1 = np.array([ np.array(range(3)) + x*3 for x in range(3) ])
a1 = np.arange(9).reshape(3, 3)
a1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [30]:
a1.sum(), a1.sum( axis = 0 ), a1.sum( axis = 1 )

(36, array([ 9, 12, 15]), array([ 3, 12, 21]))

In [109]:
a1.min(), a1.min( axis = 0 ), a1.min( axis = 1 )

(0, array([0, 1, 2]), array([0, 3, 6]))

In [110]:
a1.max(), a1.max( axis = 0 ), a1.max( axis = 1 )

(8, array([6, 7, 8]), array([2, 5, 8]))

In [114]:
np.ones( (4,) ), np.cumsum( np.ones( (4,) ) )

(array([1., 1., 1., 1.]), array([1., 2., 3., 4.]))

In [116]:
np.cumsum( np.ones( (4,4) ), axis = 0 )

array([[1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.]])

In [123]:
a1.mean(), np.median( a1 )

(4.0, 4.0)

In [35]:
l = np.linspace( 0, 1, 100 )
matrix = [ l, np.sin(l), np.cos(l), np.random.random((100)) ]
np.corrcoef( matrix )

array([[ 1.        ,  0.99737292, -0.97223691,  0.0241623 ],
       [ 0.99737292,  1.        , -0.95307816,  0.0184285 ],
       [-0.97223691, -0.95307816,  1.        , -0.04688893],
       [ 0.0241623 ,  0.0184285 , -0.04688893,  1.        ]])

In [144]:
a1.std(), a1.std( axis = 0 )

(2.581988897471611, array([2.44948974, 2.44948974, 2.44948974]))

### Misc functions

In [163]:
a2 = 8 - a1
a2

array([[8, 7, 6],
       [5, 4, 3],
       [2, 1, 0]])

In [164]:
#a2 = a1.T
np.sort( a2 )

array([[6, 7, 8],
       [3, 4, 5],
       [0, 1, 2]])

In [167]:
a2.sort( axis = 0 )
a2

array([[2, 1, 0],
       [5, 4, 3],
       [8, 7, 6]])

### Подмножества, срезы, индексация

In [173]:
a1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [170]:
a1[0,0], a1[(0,0)]

(0, 0)

In [172]:
a1[ 0, : ], a1[ 0 ]

(array([0, 1, 2]), array([0, 1, 2]))

In [177]:
a1[ 0:3, 1:3 ]

array([[1, 2],
       [4, 5],
       [7, 8]])

In [181]:
c1 = np.linspace( 0, 5, 6 )
c1

array([0., 1., 2., 3., 4., 5.])

In [184]:
c1[ :-1 ]

array([0., 1., 2., 3., 4.])

In [187]:
a1[ 1, ... ], a1[ 1 ]

(array([3, 4, 5]), array([3, 4, 5]))

In [215]:
a1[ ::-1 ] # Reversed array

array([[6, 7, 8],
       [3, 4, 5],
       [0, 1, 2]])

In [214]:
a1[ a1 < 5 ] # Boolean Indexing

array([0, 1, 2, 3, 4])

In [193]:
a1[[1, 0, 1, 0],[0, 1, 2, 0]] # Select elements (1,0),(0,1),(1,2) and (0,0)

array([3, 1, 5, 0])

### Манипуляции с матрицами

In [196]:
a1.T #  np.transpose( a1 )

array([[0, 3, 6],
       [1, 4, 7],
       [2, 5, 8]])

In [205]:
a1.ravel() # Flatten the array

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [40]:
np.eye(4).reshape(2,2,-1) # Reshape, but don’t change data

array([[[1., 0., 0., 0.],
        [0., 1., 0., 0.]],

       [[0., 0., 1., 0.],
        [0., 0., 0., 1.]]])

In [257]:
np.arange(16.0).reshape(4, 4)

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.],
       [12., 13., 14., 15.]])

In [226]:
np.resize(a1, (3,2)) # Return a new array with new shape 

array([[0, 1],
       [2, 3],
       [4, 5]])

### Adding/Removing Elements

In [230]:
np.append( [[1, 2, 3], [4, 5, 6]], [[7, 8, 9]], axis=0 ) # Append items to an array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [233]:
np.insert( a1, 1, 5, axis = 1 ) # Insert items in an array

array([[0, 5, 1, 2],
       [3, 5, 4, 5],
       [6, 5, 7, 8]])

In [237]:
np.delete( a1, [1], axis = 0 )

array([[0, 1, 2],
       [6, 7, 8]])

## Combining Arrays

In [239]:
    np.concatenate((a1,a2), axis=0) # Concatenate arrays

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [2, 1, 0],
       [5, 4, 3],
       [8, 7, 6]])

In [240]:
np.concatenate((a1,a2), axis=1)

array([[0, 1, 2, 2, 1, 0],
       [3, 4, 5, 5, 4, 3],
       [6, 7, 8, 8, 7, 6]])

In [247]:
np.vstack((a1,o1,a1)) # Stack arrays vertically (row-wise)

array([[0., 1., 2.],
       [3., 4., 5.],
       [6., 7., 8.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [0., 1., 2.],
       [3., 4., 5.],
       [6., 7., 8.]])

In [246]:
np.hstack((a1,o1,a1)) # Stack arrays horizontally (column-wise)

array([[0., 1., 2., 1., 1., 1., 0., 1., 2.],
       [3., 4., 5., 1., 1., 1., 3., 4., 5.],
       [6., 7., 8., 1., 1., 1., 6., 7., 8.]])

In [252]:
np.column_stack( (np.array((1,2,3)), np.array((2,3,4))) ) # Create stacked column-wise arrays

array([[1, 2],
       [2, 3],
       [3, 4]])

In [261]:
np.hsplit( np.arange(16.0).reshape(4, 4), 2 ) # Split the array horizontally

[array([[ 0.,  1.],
        [ 4.,  5.],
        [ 8.,  9.],
        [12., 13.]]), array([[ 2.,  3.],
        [ 6.,  7.],
        [10., 11.],
        [14., 15.]])]

In [262]:
 np.vsplit( np.arange(16.0).reshape(4, 4), 2 ) # Split the array vertically

[array([[0., 1., 2., 3.],
        [4., 5., 6., 7.]]), array([[ 8.,  9., 10., 11.],
        [12., 13., 14., 15.]])]

### OLAP-куб

![olap-cube](http://www.olap.ru/basic/img/alpero2i2.gif)