In [1]:
import numpy as np

# Numpy versus lists


In [2]:
x = np.random.rand(10000)

In [3]:
x_list = list(x)

In [21]:
speed_1 = %timeit -o sum(x_list)

1.02 ms ± 40.5 µs per loop (mean ± std. dev. of 10 runs, 1000 loops each)


In [24]:
speed_2 = %timeit -o x.sum()

10.1 µs ± 88.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [25]:
speed_1.average / speed_2.average

101.1261632569898

In [26]:
s_1 = %timeit -o sum([x_i > 0.75 for x_i in x_list])

35.9 ms ± 656 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [29]:
s_2 = %timeit -o  (x > 0.75).sum()

23.8 µs ± 465 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [30]:
s_1.average / s_2.average

1507.8470061375006

## operations on numpy arrays versus lists

In [255]:
x = np.random.rand(10000)
y = np.random.rand(10000)
y_list = list(y)

In [256]:
x / y
x + y
x - y
x * y

x + 2

np.log(x)

array([-1.81564987, -1.03679686, -0.44100887, ..., -1.37019947,
       -0.00286044, -0.25525425])

**try any of that with ordinary Python!**

## indexing & slicing

In [270]:
div(10, 3)

NameError: name 'div' is not defined

In [258]:
x[1:10:2]

array([0.35458866, 0.16087861, 0.71390282, 0.34401385, 0.58799122])

In [265]:
x[::1000]

array([0.16273212, 0.98621573, 0.66241368, 0.30606467, 0.95782101,
       0.49760055, 0.77203739, 0.90508333, 0.75727087, 0.97479727])

## Making random and other arrays

In [65]:
x = np.random.rand(3, 5)
x = np.random.uniform(low=-2, high=2, size=(2,3))
x = np.random.normal(loc = 100, scale = 15, size = (100, 10))

In [66]:
np.empty((10, 2))

array([[4.63644028e-310, 0.00000000e+000],
       [6.90946697e-310, 6.90946697e-310],
       [6.90946697e-310, 6.37344683e-322],
       [4.63644083e-310, 6.90950529e-310],
       [6.90946697e-310, 6.90946697e-310],
       [6.90946697e-310, 6.90946697e-310],
       [6.90946751e-310, 6.90946751e-310],
       [6.90946751e-310, 6.90946751e-310],
       [6.90946751e-310, 6.90946751e-310],
       [6.90946751e-310, 6.90946751e-310]])

In [70]:
np.full((10, 2), 0)

array([[0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0]])

In [81]:
np.ones((10, 2)) * 3.14

array([[3.14, 3.14],
       [3.14, 3.14],
       [3.14, 3.14],
       [3.14, 3.14],
       [3.14, 3.14],
       [3.14, 3.14],
       [3.14, 3.14],
       [3.14, 3.14],
       [3.14, 3.14],
       [3.14, 3.14]])

check size shape

In [77]:
np.ones((10, 2)).size, np.ones((10, 3)).shape


(20, (10, 3))

In [132]:
np.linspace(0, 1, 25)

array([0.        , 0.04166667, 0.08333333, 0.125     , 0.16666667,
       0.20833333, 0.25      , 0.29166667, 0.33333333, 0.375     ,
       0.41666667, 0.45833333, 0.5       , 0.54166667, 0.58333333,
       0.625     , 0.66666667, 0.70833333, 0.75      , 0.79166667,
       0.83333333, 0.875     , 0.91666667, 0.95833333, 1.        ])

## make array of specified dtype

In [91]:
np.array([1, 2, 3], dtype = np.float128).dtype

dtype('float128')

In [94]:
np.array([0, 1, 2, 3], dtype = np.bool8)

array([False,  True,  True,  True])

## reshape, transpose

In [79]:
x = np.ones(10000)

In [80]:
z = x.reshape((10, 1000))

In [81]:
x = np.random.rand(3, 2, 4)

In [85]:
x

array([[[0.5494263 , 0.01698511, 0.85795534, 0.42320594],
        [0.55047931, 0.41220911, 0.84478579, 0.60075333]],

       [[0.2302786 , 0.42395786, 0.22419792, 0.90082652],
        [0.85560879, 0.83345547, 0.93190253, 0.2135532 ]],

       [[0.64754069, 0.02811885, 0.30531282, 0.40275349],
        [0.64962335, 0.16625403, 0.95069189, 0.8083735 ]]])

In [86]:
x.T.shape

(4, 2, 3)

In [87]:
x.shape

(3, 2, 4)

In [84]:
x.flatten()

array([0.5494263 , 0.01698511, 0.85795534, 0.42320594, 0.55047931,
       0.41220911, 0.84478579, 0.60075333, 0.2302786 , 0.42395786,
       0.22419792, 0.90082652, 0.85560879, 0.83345547, 0.93190253,
       0.2135532 , 0.64754069, 0.02811885, 0.30531282, 0.40275349,
       0.64962335, 0.16625403, 0.95069189, 0.8083735 ])

# Operations

In [97]:
x = np.random.rand(10)
x.sort()

In [98]:
x

array([0.15233967, 0.21420509, 0.22726768, 0.31588655, 0.3187258 ,
       0.5542647 , 0.56468808, 0.75126383, 0.81608951, 0.87602804])

In [129]:
x = np.random.rand(10, 3)

In [106]:
x

array([[0.34920708, 0.25713402, 0.50945346],
       [0.30264711, 0.81323428, 0.7616083 ],
       [0.94466006, 0.36170023, 0.40151379],
       [0.31780388, 0.28790996, 0.59329335],
       [0.38446161, 0.64933621, 0.02488351],
       [0.34397985, 0.07331255, 0.82796832],
       [0.84189833, 0.45777263, 0.06162499],
       [0.31921441, 0.74843694, 0.81453866],
       [0.73286473, 0.1195986 , 0.87293823],
       [0.8718671 , 0.42517184, 0.88216548]])

In [107]:
x.sum(axis = 0)

array([5.40860415, 4.19360727, 5.7499881 ])

In [108]:
x.sum(axis = 1)

array([1.11579456, 1.87748969, 1.70787408, 1.1990072 , 1.05868133,
       1.24526072, 1.36129595, 1.88219001, 1.72540157, 2.17920442])

In [109]:
x.cumsum(axis = 1)

array([[0.34920708, 0.6063411 , 1.11579456],
       [0.30264711, 1.11588139, 1.87748969],
       [0.94466006, 1.30636029, 1.70787408],
       [0.31780388, 0.60571385, 1.1990072 ],
       [0.38446161, 1.03379781, 1.05868133],
       [0.34397985, 0.4172924 , 1.24526072],
       [0.84189833, 1.29967096, 1.36129595],
       [0.31921441, 1.06765135, 1.88219001],
       [0.73286473, 0.85246334, 1.72540157],
       [0.8718671 , 1.29703894, 2.17920442]])

In [111]:
x.cumsum(axis = 0)

array([[0.34920708, 0.25713402, 0.50945346],
       [0.65185418, 1.0703683 , 1.27106177],
       [1.59651424, 1.43206853, 1.67257556],
       [1.91431812, 1.71997849, 2.26586891],
       [2.29877973, 2.3693147 , 2.29075242],
       [2.64275958, 2.44262725, 3.11872074],
       [3.48465791, 2.90039988, 3.18034573],
       [3.80387232, 3.64883682, 3.99488439],
       [4.53673705, 3.76843542, 4.86782262],
       [5.40860415, 4.19360727, 5.7499881 ]])

In [112]:
np.median(x, axis =0)

array([0.36683434, 0.39343604, 0.67745083])

In [113]:
np.mean(x, axis=0)

array([0.54086042, 0.41936073, 0.57499881])

In [119]:
np.percentile(x, q = 75, axis = 0)

array([0.81463993, 0.60144531, 0.82461091])

In [134]:
x.max(axis=0)

array([0.86624705, 0.96447847, 0.93109241])

In [136]:
x.max(axis=0)

array([0.86624705, 0.96447847, 0.93109241])

In [133]:
x.argmax(axis=0)

array([8, 1, 3])

In [138]:
x = np.random.rand(5, 3)

In [139]:
x

array([[0.42330513, 0.99989283, 0.97732186],
       [0.87247612, 0.46074616, 0.4588436 ],
       [0.10378252, 0.56367027, 0.17764736],
       [0.92087631, 0.74728261, 0.29489878],
       [0.74874233, 0.27227413, 0.54627241]])

In [140]:
x.sort(axis = 0)

In [141]:
x

array([[0.10378252, 0.27227413, 0.17764736],
       [0.42330513, 0.46074616, 0.29489878],
       [0.74874233, 0.56367027, 0.4588436 ],
       [0.87247612, 0.74728261, 0.54627241],
       [0.92087631, 0.99989283, 0.97732186]])

In [142]:
x.sort(axis = 1)

In [143]:
x

array([[0.10378252, 0.17764736, 0.27227413],
       [0.29489878, 0.42330513, 0.46074616],
       [0.4588436 , 0.56367027, 0.74874233],
       [0.54627241, 0.74728261, 0.87247612],
       [0.92087631, 0.97732186, 0.99989283]])

## Stacking

In [154]:
y = np.random.rand(3, 3)

In [155]:
x,y

(array([[0.10378252, 0.17764736, 0.27227413],
        [0.29489878, 0.42330513, 0.46074616],
        [0.4588436 , 0.56367027, 0.74874233],
        [0.54627241, 0.74728261, 0.87247612],
        [0.92087631, 0.97732186, 0.99989283]]),
 array([[0.44483739, 0.04873773, 0.60624188],
        [0.67955354, 0.25210546, 0.29088925],
        [0.16100528, 0.0989465 , 0.26050908]]))

In [156]:
np.vstack((x,y))

array([[0.10378252, 0.17764736, 0.27227413],
       [0.29489878, 0.42330513, 0.46074616],
       [0.4588436 , 0.56367027, 0.74874233],
       [0.54627241, 0.74728261, 0.87247612],
       [0.92087631, 0.97732186, 0.99989283],
       [0.44483739, 0.04873773, 0.60624188],
       [0.67955354, 0.25210546, 0.29088925],
       [0.16100528, 0.0989465 , 0.26050908]])

In [157]:
np.r_[x,y]

array([[0.10378252, 0.17764736, 0.27227413],
       [0.29489878, 0.42330513, 0.46074616],
       [0.4588436 , 0.56367027, 0.74874233],
       [0.54627241, 0.74728261, 0.87247612],
       [0.92087631, 0.97732186, 0.99989283],
       [0.44483739, 0.04873773, 0.60624188],
       [0.67955354, 0.25210546, 0.29088925],
       [0.16100528, 0.0989465 , 0.26050908]])

In [158]:
x

array([[0.10378252, 0.17764736, 0.27227413],
       [0.29489878, 0.42330513, 0.46074616],
       [0.4588436 , 0.56367027, 0.74874233],
       [0.54627241, 0.74728261, 0.87247612],
       [0.92087631, 0.97732186, 0.99989283]])

In [159]:
z = np.random.rand(5, 2)

In [160]:
z

array([[0.34406779, 0.55884704],
       [0.53408019, 0.44358631],
       [0.17663029, 0.45049878],
       [0.05177155, 0.20239246],
       [0.17794898, 0.1535549 ]])

In [161]:
np.hstack((x,z))

array([[0.10378252, 0.17764736, 0.27227413, 0.34406779, 0.55884704],
       [0.29489878, 0.42330513, 0.46074616, 0.53408019, 0.44358631],
       [0.4588436 , 0.56367027, 0.74874233, 0.17663029, 0.45049878],
       [0.54627241, 0.74728261, 0.87247612, 0.05177155, 0.20239246],
       [0.92087631, 0.97732186, 0.99989283, 0.17794898, 0.1535549 ]])

In [162]:
np.c_[x,z]

array([[0.10378252, 0.17764736, 0.27227413, 0.34406779, 0.55884704],
       [0.29489878, 0.42330513, 0.46074616, 0.53408019, 0.44358631],
       [0.4588436 , 0.56367027, 0.74874233, 0.17663029, 0.45049878],
       [0.54627241, 0.74728261, 0.87247612, 0.05177155, 0.20239246],
       [0.92087631, 0.97732186, 0.99989283, 0.17794898, 0.1535549 ]])

## Broadcasting

In [163]:
x = np.random.rand(3)

In [164]:
x

array([0.47589425, 0.44904161, 0.42677872])

In [165]:
x + 2.0

array([2.47589425, 2.44904161, 2.42677872])

In [166]:
x = np.random.rand(3,3)

In [167]:
x

array([[0.29313961, 0.60525532, 0.92060447],
       [0.84181844, 0.19451664, 0.18107197],
       [0.57427806, 0.72421555, 0.14815484]])

In [168]:
y = np.random.rand(3)

In [169]:
x,y

(array([[0.29313961, 0.60525532, 0.92060447],
        [0.84181844, 0.19451664, 0.18107197],
        [0.57427806, 0.72421555, 0.14815484]]),
 array([0.47861914, 0.45133118, 0.50109621]))

In [170]:
x + y

array([[0.77175875, 1.0565865 , 1.42170068],
       [1.32043757, 0.64584783, 0.68216818],
       [1.0528972 , 1.17554673, 0.64925105]])

In [171]:
x = np.random.rand(3).reshape(3,1)

In [172]:
x

array([[0.91594325],
       [0.50122855],
       [0.62829616]])

In [173]:
y = np.random.rand(3)

In [174]:
y

array([0.63241116, 0.82136346, 0.57678145])

In [176]:
x + y

array([[1.54835441, 1.73730671, 1.4927247 ],
       [1.13363971, 1.32259201, 1.07801   ],
       [1.26070733, 1.44965962, 1.20507761]])

# Linear algebra


In [182]:
x = np.random.rand(3)
y = np.random.rand(3)

In [183]:
x


array([7.61412255e-04, 9.27213139e-01, 8.97002692e-01])

In [184]:
y

array([0.90937211, 0.85564921, 0.25729675])

In [185]:
np.inner(x, y)

1.0248574702817965

In [186]:
(x * y).sum()

1.0248574702817965

In [187]:
np.dot(x, y)

1.0248574702817965

In [192]:
x,y

(array([7.61412255e-04, 9.27213139e-01, 8.97002692e-01]),
 array([0.90937211, 0.85564921, 0.25729675]))

In [193]:
x = np.random.rand(5, 3)
y = np.random.rand(3, 2)

In [194]:
x @ y

array([[0.54468881, 0.6201116 ],
       [0.52274352, 0.34161451],
       [0.29735866, 0.20968712],
       [0.61452947, 0.60240043],
       [0.47580991, 0.30878094]])

In [196]:
np.dot(x, y)

array([[0.54468881, 0.6201116 ],
       [0.52274352, 0.34161451],
       [0.29735866, 0.20968712],
       [0.61452947, 0.60240043],
       [0.47580991, 0.30878094]])

In [198]:
np.inner(x,y.T)

array([[0.54468881, 0.6201116 ],
       [0.52274352, 0.34161451],
       [0.29735866, 0.20968712],
       [0.61452947, 0.60240043],
       [0.47580991, 0.30878094]])

## K nearest neighbour?

In [199]:
N = 10000
d = 3

In [200]:
X = np.random.rand(N, d)

In [201]:
X.shape

(10000, 3)

In [202]:
X


array([[0.42994727, 0.27386473, 0.14331578],
       [0.4800494 , 0.97140645, 0.686839  ],
       [0.46966512, 0.63718608, 0.73839586],
       ...,
       [0.37891008, 0.05096002, 0.18228879],
       [0.63055285, 0.89769851, 0.84190147],
       [0.34079586, 0.40214373, 0.28193742]])

In [203]:
k = 5

In [204]:
centroids = np.random.rand(k, d)

In [205]:
centroids

array([[0.34089751, 0.38467361, 0.52212234],
       [0.42000783, 0.87736966, 0.54706381],
       [0.27429459, 0.92507199, 0.71500211],
       [0.92673441, 0.81630153, 0.26794779],
       [0.29104025, 0.65177466, 0.82808544]])

In [206]:
X

array([[0.42994727, 0.27386473, 0.14331578],
       [0.4800494 , 0.97140645, 0.686839  ],
       [0.46966512, 0.63718608, 0.73839586],
       ...,
       [0.37891008, 0.05096002, 0.18228879],
       [0.63055285, 0.89769851, 0.84190147],
       [0.34079586, 0.40214373, 0.28193742]])

In [207]:
X[0]

array([0.42994727, 0.27386473, 0.14331578])

In [208]:
centroids[0]

array([0.34089751, 0.38467361, 0.52212234])

# Euclidean distance

In [210]:
np.sqrt(((X[0] - centroids[0])**2).sum())

0.40460211796316875

In [211]:
def euclidean(x, y):
    return np.sqrt(((x - y)**2).sum())

In [213]:
euclidean(X[0], centroids[0])

0.40460211796316875

In [222]:
np.array([euclidean(X[0], centroids[k]) for k in range(5)])

array([0.40460212, 0.72617454, 0.88041122, 0.74603504, 0.79436802])

In [223]:
np.sqrt(((X[0] - centroids)**2).sum(axis=1))

array([0.40460212, 0.72617454, 0.88041122, 0.74603504, 0.79436802])

In [242]:
#%%timeit -o 

distances = np.empty((N, k))

for i in range(N):
    for j in range(k):
        distances[i,j] = euclidean(X[i], centroids[j])

In [243]:
distances

array([[0.40460212, 0.72617454, 0.88041122, 0.74603504, 0.79436802],
       [0.6251002 , 0.17884354, 0.21277943, 0.63170791, 0.3972902 ],
       [0.35653595, 0.31106584, 0.34870506, 0.67993829, 0.20040931],
       ...,
       [0.47780389, 0.90426917, 1.02897989, 0.94509078, 0.8864269 ],
       [0.67033831, 0.36286597, 0.37917371, 0.65097758, 0.41944991],
       [0.24081945, 0.54991474, 0.682218  , 0.71766732, 0.60255197]])

In [244]:
X.reshape((N, 1, d))

array([[[0.42994727, 0.27386473, 0.14331578]],

       [[0.4800494 , 0.97140645, 0.686839  ]],

       [[0.46966512, 0.63718608, 0.73839586]],

       ...,

       [[0.37891008, 0.05096002, 0.18228879]],

       [[0.63055285, 0.89769851, 0.84190147]],

       [[0.34079586, 0.40214373, 0.28193742]]])

In [245]:
centroids.shape

(5, 3)

In [246]:
X.reshape((N, 1, d)).shape

(10000, 1, 3)

In [247]:
(X.reshape((N, 1, d)) - centroids).shape

(10000, 5, 3)

In [248]:
#%%timeit

distances2 = np.sqrt(
    ((X.reshape((N, 1, d)) - centroids)**2).sum(axis = 2)
)

In [251]:
(distances == distances2).all()

True

In [252]:
distances.argmin(axis = 1)

array([0, 1, 4, ..., 0, 1, 0])