In [1]:
import numpy as np
import pandas as pd
import random

In [4]:
# generate random data
rng = np.random.default_rng(seed=12345)

In [5]:
np.ones((10, 5)).shape

(10, 5)

In [6]:
np.ones((3,4,5), dtype=np.float64).strides

(160, 40, 8)

In [7]:
# Advanced Array Manipulation
# Reshaping Arrays
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [8]:
arr.reshape((4,2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [9]:
# multidimensional array can also be reshaped
arr.reshape((4,2)).reshape((2,4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [10]:
arr = np.arange(15)
arr.reshape((5,-1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [11]:
other_arr = np.ones((3,5))
other_arr

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [12]:
other_arr.shape

(3, 5)

In [13]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [14]:
# from a higher dimension to one-dimensional
arr = np.arange(15).reshape((5,3))
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [15]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [16]:
# flatten method behaves like ravel except it always returns a copy of the data
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [17]:
arr = np.arange(12).reshape((3,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [18]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [19]:
arr.ravel('f')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

In [20]:
# Concatenating and Splitting Arrays
arr1 = np.array([[1,2,3], [4,5,6]])
arr1

array([[1, 2, 3],
       [4, 5, 6]])

In [21]:
arr2 = np.array([[7, 8, 9], [10, 11, 12]])
arr2

array([[ 7,  8,  9],
       [10, 11, 12]])

In [22]:
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [23]:
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [24]:
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [25]:
np.hstack((arr1,arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [26]:
# slices an array into multiple arrays along an axis
arr = rng.standard_normal((5, 2))
arr

array([[-1.42382504,  1.26372846],
       [-0.87066174, -0.25917323],
       [-0.07534331, -0.74088465],
       [-1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306]])

In [27]:
first, second, third = np.split(arr, [1,3])

In [28]:
first

array([[-1.42382504,  1.26372846]])

In [29]:
second

array([[-0.87066174, -0.25917323],
       [-0.07534331, -0.74088465]])

In [30]:
third

array([[-1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306]])

In [31]:
arr = np.arange(6)
arr1 = arr.reshape((3,2))
arr2 = rng.standard_normal((3,2))
np.r_[arr1, arr2]

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [ 2.34740965,  0.96849691],
       [-0.75938718,  0.90219827],
       [-0.46695317, -0.06068952]])

In [32]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.        ,  1.        ,  0.        ],
       [ 2.        ,  3.        ,  1.        ],
       [ 4.        ,  5.        ,  2.        ],
       [ 2.34740965,  0.96849691,  3.        ],
       [-0.75938718,  0.90219827,  4.        ],
       [-0.46695317, -0.06068952,  5.        ]])

In [33]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

In [34]:
# Repeating Elements: tile and repeat
arr = np.arange(3)
arr

array([0, 1, 2])

In [35]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [36]:
arr.repeat([2,3,4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [37]:
arr = rng.standard_normal((2,2))
arr

array([[ 0.78884434, -1.25666813],
       [ 0.57585751,  1.39897899]])

In [38]:
arr.repeat(2, axis=0)

array([[ 0.78884434, -1.25666813],
       [ 0.78884434, -1.25666813],
       [ 0.57585751,  1.39897899],
       [ 0.57585751,  1.39897899]])

In [39]:
arr.repeat([2,3], axis=0)

array([[ 0.78884434, -1.25666813],
       [ 0.78884434, -1.25666813],
       [ 0.57585751,  1.39897899],
       [ 0.57585751,  1.39897899],
       [ 0.57585751,  1.39897899]])

In [40]:
arr.repeat([2,3], axis=1)

array([[ 0.78884434,  0.78884434, -1.25666813, -1.25666813, -1.25666813],
       [ 0.57585751,  0.57585751,  1.39897899,  1.39897899,  1.39897899]])

In [41]:
# tile
arr

array([[ 0.78884434, -1.25666813],
       [ 0.57585751,  1.39897899]])

In [42]:
np.tile(arr, 2)

array([[ 0.78884434, -1.25666813,  0.78884434, -1.25666813],
       [ 0.57585751,  1.39897899,  0.57585751,  1.39897899]])

In [43]:
np.tile(arr, (2,1))

array([[ 0.78884434, -1.25666813],
       [ 0.57585751,  1.39897899],
       [ 0.78884434, -1.25666813],
       [ 0.57585751,  1.39897899]])

In [44]:
# Fancy Indexing Equivalents: take and put
arr = np.arange(10) * 100
arr

array([  0, 100, 200, 300, 400, 500, 600, 700, 800, 900])

In [45]:
inds = [7,1,2,6]
arr[inds]

array([700, 100, 200, 600])

In [46]:
arr.take(inds)

array([700, 100, 200, 600])

In [47]:
arr.put(inds, 42)

In [48]:
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [49]:
arr.put(inds, [40,41,42,43])
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [50]:
inds = [2,0,2,1]
arr = rng.standard_normal((2,4))
arr

array([[ 1.32229806, -0.29969852,  0.90291934, -1.62158273],
       [-0.15818926,  0.44948393, -1.34360107, -0.08168759]])

In [51]:
arr.take(inds, axis=1)

array([[ 0.90291934,  1.32229806,  0.90291934, -0.29969852],
       [-1.34360107, -0.15818926, -1.34360107,  0.44948393]])

In [52]:
# Broadcasting - how operations work between arrays of different shapes

# combining a scalar value with an array
arr = np.arange(5)
arr

array([0, 1, 2, 3, 4])

In [53]:
arr * 4

array([ 0,  4,  8, 12, 16])

In [62]:
arr = rng.standard_normal((4,3))
arr

array([[ 0.53025239,  0.53672097,  0.61835001],
       [-0.79501746,  0.30003095, -1.60270159],
       [ 0.26679883, -1.26162378, -0.07127081],
       [ 0.47404973, -0.41485376,  0.0977165 ]])

In [63]:
arr.mean(0)

array([ 0.11902087, -0.20993141, -0.23947647])

In [64]:
demeaned = arr - arr.mean(0)
demeaned

array([[ 0.41123151,  0.74665238,  0.85782649],
       [-0.91403833,  0.50996235, -1.36322512],
       [ 0.14777796, -1.05169237,  0.16820566],
       [ 0.35502886, -0.20492235,  0.33719297]])

In [65]:
demeaned.mean(0)

array([0.00000000e+00, 6.93889390e-17, 2.77555756e-17])

In [66]:
arr

array([[ 0.53025239,  0.53672097,  0.61835001],
       [-0.79501746,  0.30003095, -1.60270159],
       [ 0.26679883, -1.26162378, -0.07127081],
       [ 0.47404973, -0.41485376,  0.0977165 ]])

In [68]:
row_means = arr.mean(1)
row_means

array([ 0.56177446, -0.69922937, -0.35536525,  0.05230416])

In [69]:
row_means.shape

(4,)

In [70]:
row_means.reshape((4,1))

array([[ 0.56177446],
       [-0.69922937],
       [-0.35536525],
       [ 0.05230416]])

In [73]:
demeaned = arr - row_means.reshape((4,1))
demeaned.mean(1)

array([3.70074342e-17, 7.40148683e-17, 0.00000000e+00, 0.00000000e+00])

In [75]:
arr - arr.mean(1).reshape((4,1))

array([[-0.03152207, -0.02505349,  0.05657556],
       [-0.09578809,  0.99926031, -0.90347222],
       [ 0.62216408, -0.90625853,  0.28409445],
       [ 0.42174557, -0.46715792,  0.04541234]])

In [77]:
arr = np.zeros((4,4))
arr

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [78]:
arr_3d = arr[:, np.newaxis, :]
arr_3d

array([[[0., 0., 0., 0.]],

       [[0., 0., 0., 0.]],

       [[0., 0., 0., 0.]],

       [[0., 0., 0., 0.]]])

In [79]:
arr_3d.shape

(4, 1, 4)

In [83]:
arr_1d = rng.standard_normal(3)
arr_1d[:, np.newaxis]

array([[ 0.5343599 ],
       [-1.06580785],
       [-0.18147274]])

In [84]:
arr_1d[np.newaxis, :]

array([[ 0.5343599 , -1.06580785, -0.18147274]])

In [85]:
arr = rng.standard_normal((3,4,5))
arr

array([[[ 1.62195180e+00, -3.17391946e-01, -8.15814967e-01,
          3.86579017e-01, -2.23638926e-01],
        [-7.01690809e-01, -1.79571318e+00,  8.18325622e-01,
         -5.71032902e-01,  7.85525063e-04],
        [-1.06364272e+00,  1.30171450e+00,  7.47872942e-01,
          9.80875909e-01, -1.10418688e-01],
        [ 4.67918531e-01,  8.90607150e-01,  1.02300937e+00,
          3.12383389e-01, -6.19046857e-02]],

       [[-3.59479647e-01, -7.48643984e-01, -9.65478907e-01,
          3.60034657e-01, -2.44552532e-01],
        [-1.99585661e+00, -1.55247617e-01,  1.06383087e+00,
         -2.75171567e-01, -1.85333593e+00],
        [-1.24341928e-01,  7.84974522e-01,  2.01998597e-01,
         -4.28074443e-01,  1.84828890e+00],
        [ 1.89995289e+00, -9.84250348e-02,  8.13445440e-01,
          3.92494389e-01,  7.81442900e-01]],

       [[ 1.45327152e+00,  8.20186045e-01,  8.77053446e-02,
         -6.53505648e-01, -8.11886879e-01],
        [-2.55381724e-02,  1.15818454e+00,  3.00520870e-01,


In [87]:
depth_means = arr.mean(2)
depth_means

array([[ 0.130337  , -0.44986515,  0.37128039,  0.52640275],
       [-0.39162408, -0.64315617,  0.45656913,  0.75778212],
       [ 0.17915408,  0.34869908, -0.58807595, -0.296688  ]])

In [88]:
depth_means.shape

(3, 4)

In [91]:
demeaned = arr - depth_means[:, :, np.newaxis]
demeaned.mean(2)

array([[ 4.44089210e-17, -2.22044605e-17,  4.44089210e-17,
        -4.44089210e-17],
       [-4.99600361e-17,  8.88178420e-17,  0.00000000e+00,
        -4.44089210e-17],
       [ 4.44089210e-17,  0.00000000e+00, -1.11022302e-17,
         0.00000000e+00]])

In [94]:
def demean_axis(arr, axis=0):
    means = arr.mean(axis)


NameError: name 'axis' is not defined

In [95]:
# setting array value by broadcasting
arr = np.zeros((4,3))
arr

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [97]:
arr[:] = 5
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [98]:
col = np.array([1.28, -0.42, 0.44, 1.6])
col

array([ 1.28, -0.42,  0.44,  1.6 ])

In [100]:
arr[:] = col[:, np.newaxis]
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [101]:
arr[:2] = [[-1.37], [0.509]]
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

In [102]:
# ufunc
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [103]:
np.add.reduce(arr)

45

In [104]:
arr.sum()

45

In [107]:
my_rng = np.random.default_rng(12346)
my_rng

arr = my_rng.standard_normal((5,5))
arr

array([[-0.903889  ,  0.15713146,  0.89761199, -0.76219554, -0.17625556],
       [ 0.05303172, -1.62844028, -0.17753333,  1.96360352,  1.78125478],
       [-0.87971984, -1.69847913, -1.81891091,  0.11895453, -0.44409513],
       [ 0.76911421, -0.03433778,  0.39252776,  0.75891811, -0.07045967],
       [ 1.04984775,  1.02967072, -0.42005533,  0.78626627,  0.96124929]])

In [109]:
arr[::2].sort(1)
arr[:, :-1] < arr[:, 1:]

array([[ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True]])

In [111]:
arr = np.arange(15).reshape((3,5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [115]:
np.add.accumulate(arr, axis=1)

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]])

In [116]:
arr = np.arange(3).repeat([1,2,2])
arr

array([0, 1, 1, 2, 2])

In [117]:
np.multiply.outer(arr, np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [119]:
x, y = rng.standard_normal((3,4)), rng.standard_normal(5)
x,y

(array([[ 0.62848817,  0.60119653,  0.95075786, -0.86924667],
        [-0.52900707,  0.0456841 , -1.02755181, -1.2292893 ],
        [-0.88335847, -0.07089346,  0.37405335, -0.02459374]]),
 array([ 0.07726066, -0.68391322, -0.72083767,  1.12062282, -0.05481416]))

In [122]:
result = np.subtract.outer(x,y)
result.shape

(3, 4, 5)

In [124]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [126]:
np.add.reduceat(arr, [0,5,8])

array([10, 18, 17])

In [127]:
dtype = [('x', np.float64), ('y', np.int32)]
dtype

[('x', numpy.float64), ('y', numpy.int32)]

In [None]:
sarr = np.arr([])