# Aggregations with NumPy Arrays

NumPy provides provides various options for calculating summary statistics like min, max, sum, average, standard deviation, median etc.

<b> Sum operations comparison<b>

Lets compare built-in sum() with np.sum() for a big array.

In [1]:
import numpy as np

In [4]:
np.random.random(100)  #create np array of 100 random values


array([0.02048692, 0.20375205, 0.01546757, 0.03386346, 0.92049175,
       0.40257568, 0.46337998, 0.68242872, 0.03551958, 0.61329148,
       0.78487223, 0.4719417 , 0.20101109, 0.98739064, 0.42517067,
       0.95664446, 0.42709763, 0.40186752, 0.88847401, 0.05474129,
       0.92118898, 0.87341187, 0.82152509, 0.53789545, 0.79316108,
       0.20475856, 0.26160011, 0.85983521, 0.44274448, 0.97183487,
       0.7228159 , 0.0506053 , 0.45890372, 0.2592248 , 0.28960949,
       0.48998687, 0.70902505, 0.93856778, 0.55463926, 0.64562595,
       0.45130238, 0.56029762, 0.20181685, 0.5569976 , 0.40616021,
       0.64538093, 0.79039332, 0.17626017, 0.52589668, 0.48086486,
       0.69459972, 0.16620457, 0.17749516, 0.57204257, 0.10194869,
       0.32493049, 0.77030011, 0.36107187, 0.81651586, 0.12425743,
       0.6005592 , 0.42885603, 0.34124061, 0.84796753, 0.78294385,
       0.14346173, 0.83537958, 0.86159704, 0.88942616, 0.85348195,
       0.53815507, 0.89353929, 0.78015501, 0.13788366, 0.00667

In [5]:
nparray = np.random.random(100)  #create np array of 100 random values
print("type : ", type(nparray)) 
sum(nparray)   #apply sum on it

type :  <class 'numpy.ndarray'>


52.88739250666828

In [6]:
np.sum(nparray)  # apply np.sum() on array

52.88739250666825

In [7]:
%timeit sum(nparray)

12.4 µs ± 335 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [8]:
%timeit np.sum(nparray)

2.87 µs ± 35.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


np.sum() performs much better than the built-in sum() function.

<b> Finding mimimum and maximum<b>

In [15]:
nparray

array([0.93590421, 0.64662614, 0.78043065, 0.3195217 , 0.09856843,
       0.07461706, 0.53835071, 0.38534825, 0.11614307, 0.48195876,
       0.26809185, 0.07647077, 0.70845964, 0.24385294, 0.08436315,
       0.54485213, 0.99366136, 0.30353153, 0.06906205, 0.21052785,
       0.09705089, 0.35816377, 0.36192618, 0.41553726, 0.74889155,
       0.91036602, 0.3865395 , 0.74393723, 0.7767306 , 0.64918206,
       0.4911143 , 0.8249897 , 0.49765776, 0.03279451, 0.71130514,
       0.90291026, 0.09869709, 0.67953225, 0.65387181, 0.84521436,
       0.40202118, 0.75429394, 0.28321498, 0.53004401, 0.72722108,
       0.81737237, 0.8150976 , 0.67350288, 0.06019803, 0.30967881,
       0.46892584, 0.31767079, 0.80798928, 0.92225873, 0.05661287,
       0.87771533, 0.17993659, 0.00140134, 0.49407007, 0.47412474,
       0.21991786, 0.55340462, 0.45036666, 0.55994158, 0.16525676,
       0.92881969, 0.15780931, 0.47306187, 0.51940651, 0.23006248,
       0.7594677 , 0.19150656, 0.27603523, 0.88736462, 0.58580

In [11]:
%timeit np.min(nparray)

2.82 µs ± 78.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [13]:
np.min(nparray)

0.013624986923240656

In [12]:
np.max(nparray)

0.999228888110364

Other way to compute min and max using the array variable is as follows

In [14]:
nparray.min()

0.013624986923240656

In [15]:
nparray.max()

0.999228888110364

<b> Multidimensional aggregates<b>

Computing aggregates over the complete array 

In [16]:
matrix = np.random.random((4, 4))
matrix

array([[0.06615341, 0.50608896, 0.56658548, 0.35768035],
       [0.65706018, 0.65492035, 0.23211526, 0.02694843],
       [0.3911608 , 0.74756944, 0.09507736, 0.90465574],
       [0.56196113, 0.01229393, 0.6331811 , 0.53751733]])

In [17]:
matrix.sum()

6.950969238929778

In [18]:
np.sum(matrix)

6.950969238929778

In [19]:
matrix.min()

0.012293932507297933

In [20]:
np.min(matrix)

0.012293932507297933

In [21]:
matrix.max()

0.9046557441572195

In [22]:
np.max(matrix)

0.9046557441572195

Computing agggregates within each Column

In [23]:
matrix

array([[0.06615341, 0.50608896, 0.56658548, 0.35768035],
       [0.65706018, 0.65492035, 0.23211526, 0.02694843],
       [0.3911608 , 0.74756944, 0.09507736, 0.90465574],
       [0.56196113, 0.01229393, 0.6331811 , 0.53751733]])

In [35]:
n1, n2 = np.vsplit(matrix,[1])

In [43]:
n1

array([[0.06615341, 0.50608896, 0.56658548, 0.35768035]])

In [38]:
n1.sum()

1.496508196067441

In [45]:
matrix.sum(axis=0)

array([1.67633551, 1.92087268, 1.52695919, 1.82680185])

In [41]:
np.sum(matrix, axis=0)

array([1.67633551, 1.92087268, 1.52695919, 1.82680185])

In [44]:
matrix.max(axis=0)

array([0.65706018, 0.74756944, 0.6331811 , 0.90465574])

In [46]:
np.max(matrix, axis=0)

array([0.65706018, 0.74756944, 0.6331811 , 0.90465574])

In [47]:
matrix.min(axis=0)

array([0.06615341, 0.01229393, 0.09507736, 0.02694843])

In [48]:
np.min(matrix, axis=0)

array([0.06615341, 0.01229393, 0.09507736, 0.02694843])

Computing agggregates within each Row

In [49]:
matrix

array([[0.06615341, 0.50608896, 0.56658548, 0.35768035],
       [0.65706018, 0.65492035, 0.23211526, 0.02694843],
       [0.3911608 , 0.74756944, 0.09507736, 0.90465574],
       [0.56196113, 0.01229393, 0.6331811 , 0.53751733]])

In [50]:
matrix.sum(axis=1)

array([1.4965082 , 1.57104421, 2.13846334, 1.74495349])

In [51]:
np.sum(matrix, axis=1)

array([1.4965082 , 1.57104421, 2.13846334, 1.74495349])

In [52]:
matrix.min(axis=1)

array([0.06615341, 0.02694843, 0.09507736, 0.01229393])

In [53]:
np.min(matrix, axis=1)

array([0.06615341, 0.02694843, 0.09507736, 0.01229393])

In [54]:
matrix.max(axis=1)

array([0.56658548, 0.65706018, 0.90465574, 0.6331811 ])

In [55]:
np.max(matrix, axis=1)

array([0.56658548, 0.65706018, 0.90465574, 0.6331811 ])

<b> Other aggregate functions<b>

In [57]:
matrix

array([[0.06615341, 0.50608896, 0.56658548, 0.35768035],
       [0.65706018, 0.65492035, 0.23211526, 0.02694843],
       [0.3911608 , 0.74756944, 0.09507736, 0.90465574],
       [0.56196113, 0.01229393, 0.6331811 , 0.53751733]])

In [58]:
matrix.prod()  # compute product of all values

1.0800781610878671e-09

In [60]:
np.mean(matrix)

0.4344355774331111

In [59]:
matrix.mean()  # compute average of all values

0.4344355774331111

In [61]:
matrix.std() # standard deviation

0.26810258669730813

In [62]:
matrix.var()  # variance

0.07187899699378764

<b> NaN -safe aggregate functions <b>

In [63]:
x = np.array([1, 5, 3, 4, 5])
x

array([1, 5, 3, 4, 5])

In [64]:
np.nansum(x)  # NaN safe sum , ignore missing values

18

In [65]:
np.nanmax(x) # NaN safe max , ignore missing values

5

In [66]:
np.nanmin(x) # NaN safe min , ignore missing values

1

In [67]:
np.nanstd(x) # NaN safe std , ignore missing values

1.4966629547095764