# NumPy

NumPy stands for Numerical Python, is an open-source Python library that provides support for large, multi-dimensional arrays and matrices.

It also have a collection of high-level mathematical functions to operate on arrays. It was created by Travis Oliphant in 2005.

NumPy is a general-purpose array-processing package.
It provides a high-performance multidimensional array object and tools for working with these arrays.
It is the fundamental package for scientific computing with Python.


*  Provides efficient storage
*  Provides better ways of handling data for processing
*  Fast
*  Easy to learn
*  Uses relatively less memory to store data
*  Written in C

------------------------------------------------------------------------------

In [1]:
import numpy as np                               # pip install numpy

In [172]:
arr = np.array([2, 4, 6, 8, 3])

In [173]:
arr

array([2, 4, 6, 8, 3])

In [174]:
arr2 = np.array([[1,6,4]], np.int8)                #Two dimensional             Uses 8-bit, if dtype is specified the numbers shouldn't exceed it         others include int32, int64

In [175]:
arr2

array([[1, 6, 4]], dtype=int8)

In [176]:
arr2[0,1]

np.int8(6)

In [177]:
arr2.shape

(1, 3)

In [178]:
arr.shape

(5,)

In [179]:
arr2.dtype

dtype('int8')

In [180]:
arr.dtype

dtype('int64')

In [182]:
arr.argsort()

array([0, 4, 1, 2, 3])

In [183]:
np.argsort(arr)

array([0, 4, 1, 2, 3])

In [181]:
np.sort(arr)

array([2, 3, 4, 6, 8])

----------------------------------------------------------------------------------------

## NumPy Array

**There are 5 general mechanisms for creating arrays:**

* Conversion from other Python structures (e.g., lists, tuples)
* Intrinsic numpy array array creation objects (e.g., arange, ones, zeros, etc.)
* Reading arrays from disk, either from standard or custom formats
* Creating arrays from raw bytes through the use of strings or buffers
* Use of special library functions (e.g., random)

**Creating an array by conversion from other structures**

In [171]:
listarr = np.array([[6, 2, 4], [9, 6, 3], [10, 5, 15]])

In [67]:
listarr

array([[ 6,  2,  4],
       [ 9,  6,  3],
       [10,  5, 15]])

In [13]:
listarr.size

9

In [63]:
np.array({9, 5, 7})                        #  creating obj is not always efficient and thus float or int is used         

array({9, 5, 7}, dtype=object)

**Creating an array by intrinsic numpy array array creation objects**

In [15]:
zeroes = np.zeros((2,5)) 

In [16]:
zeroes                                 # prints an array oof 2 x 5 with all elements as 0

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [17]:
zeroes.shape
zeroes.dtype

dtype('float64')

In [18]:
rng = np.arange(15)

In [19]:
rng                                     # prints an array from 0-14

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [20]:
lin_space = np.linspace(2, 4, 8)

In [21]:
lin_space                               # prints an array with equally linear spaced 8 elements from 2-4

array([2.        , 2.28571429, 2.57142857, 2.85714286, 3.14285714,
       3.42857143, 3.71428571, 4.        ])

In [22]:
emp = np.empty((5, 8))

In [23]:
emp                                     # prints an empty array of 5 x 8 filled with random elements

array([[6.23042070e-307, 4.67296746e-307, 1.69121096e-306,
        1.86920328e-306, 1.89146896e-307, 7.56571288e-307,
        3.11525958e-307, 1.24610723e-306],
       [8.45593934e-307, 9.34600963e-307, 2.22518251e-306,
        1.33511969e-306, 1.78019761e-306, 9.34608432e-307,
        1.33512376e-306, 1.11261570e-306],
       [3.56027436e-307, 1.37961641e-306, 8.34424342e-308,
        1.51317924e-306, 1.33508031e-306, 1.60219170e-306,
        8.90124610e-308, 8.45603440e-307],
       [7.56570609e-307, 6.23054972e-307, 7.56592338e-307,
        7.56601165e-307, 1.02359984e-306, 3.56043054e-307,
        1.37961641e-306, 9.45697982e-308],
       [1.78020169e-306, 7.56601165e-307, 1.02359984e-306,
        1.33510679e-306, 2.22522597e-306, 8.01097889e-307,
        1.02360867e-306, 1.94702875e-308]])

In [24]:
emp_like = np.empty_like(lin_space)                      # creates an array by copying the size of a previously made array

In [25]:
emp_like                                       

array([2.        , 2.28571429, 2.57142857, 2.85714286, 3.14285714,
       3.42857143, 3.71428571, 4.        ])

In [26]:
ident = np.identity(4)

In [38]:
ident                                      # prints a 4 x 4 identity ,matrix

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [34]:
ary = np.arange(50)

In [35]:
ary

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])

In [36]:
ary.reshape(2, 25)

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
        41, 42, 43, 44, 45, 46, 47, 48, 49]])

In [37]:
ary.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])

In [150]:
#dir(np)                                 # all functions of NumPy

In [151]:
len(dir(np))

535

-----------------------------------------------------------------------------------------

## Array Slicing

In [124]:
arrs = np.array([3, 6, 4, 8, 9, 2])

In [125]:
arrs[3:6]

array([8, 9, 2])

In [126]:
ar = arrs[2:7]
ar

array([4, 8, 9, 2])

In [128]:
ar[0] = 1                                       # changes in sub array reflected in main array   both points at the same memory location  
arrs

array([3, 6, 1, 8, 9, 2])

In [136]:
arc = arrs[2:7].copy()                                           
arc

array([1, 8, 9, 2])

In [139]:
arc[3]= 4
arc

array([1, 8, 9, 4])

In [140]:
arrs

array([3, 6, 1, 8, 9, 2])

-----------------------------------------------------------------------------------------

## NumPy Axis

1D Array  -->  1 Axis  [axis0]

2D Array  -->  2 Axis [axis0, axis1]

Axis starts with zero.

arr = [[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]]

axis0  --> rows     147, 258, 369

axis1 -->  columns     123, 456, 789


In [43]:
arr_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
arrax = np.array(arr_list)
arrax

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [44]:
arrax.sum(axis=0)

array([12, 15, 18])

In [45]:
arrax.sum(axis=1)

array([ 6, 15, 24])

In [64]:
arrax.T                                  # Transpose of matrix

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [47]:
arrax.flat

<numpy.flatiter at 0x29fee04f950>

In [48]:
for i in arrax.flat : 
    print(i)

1
2
3
4
5
6
7
8
9


In [51]:
arrax.ndim                                     # No of dimensions

2

In [52]:
arrax.size

9

In [68]:
arrax.nbytes                                    # Total bytes consumed

72

In [58]:
arrax.argmax()                                  # gives index of the max element

np.int64(8)

In [60]:
arrax.argmin()

np.int64(0)

In [154]:
np.sort(arrax)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [70]:
listarr.argsort()                               # gives sequence of indices for the sorted array

array([[1, 2, 0],
       [2, 1, 0],
       [1, 0, 2]])

In [71]:
arrax.argmax(axis=0)

array([2, 2, 2])

In [72]:
arrax.argmin(axis=1)

array([0, 0, 0])

-----------------------------------------------------------------------------------------

## Numpy Matrix Operations

In [77]:
arr1 = np.array([[1, 4, 7], [2, 5, 4], [5, 3, 9]])
arr2 = np.array([[2, 1, 4], [3, 4, 3], [1, 3, 2]])

In [81]:
arr1

array([[1, 4, 7],
       [2, 5, 4],
       [5, 3, 9]])

In [82]:
arr2

array([[2, 1, 4],
       [3, 4, 3],
       [1, 3, 2]])

In [83]:
arr1 + arr2

array([[ 3,  5, 11],
       [ 5,  9,  7],
       [ 6,  6, 11]])

In [106]:
arr1 - arr2

array([[-1,  3,  3],
       [-1,  1, -3],
       [ 4,  0,  7]])

In [84]:
arr1 * arr2

array([[ 2,  4, 28],
       [ 6, 20, 12],
       [ 5,  9, 18]])

In [108]:
arr1 ** arr2

array([[   1,    4, 2401],
       [   8,  625,    0],
       [   5,   27,   81]])

In [109]:
arr1 / arr2

array([[0.5       , 4.        , 1.75      ],
       [0.66666667, 1.25      , 0.        ],
       [5.        , 1.        , 4.5       ]])

In [145]:
arr1.dot(arr2)                                                # dot product of two matrices

array([[21, 38, 30],
       [19, 22, 23],
       [28, 44, 47]])

In [153]:
np.dot(arr1, arr2)

array([[21, 38, 30],
       [19, 22, 23],
       [28, 44, 47]])

In [147]:
arr1.dot(arr2.transpose())

array([[34, 40, 27],
       [ 9, 26, 17],
       [49, 54, 32]])

In [148]:
arr1.dot(arr2.T)

array([[34, 40, 27],
       [ 9, 26, 17],
       [49, 54, 32]])

In [152]:
np.cross(arr1, arr2)

array([[  9,  10,  -7],
       [ 15,  -6,  -7],
       [-21,  -1,  12]])

In [85]:
np.sqrt(arr1)

array([[1.        , 2.        , 2.64575131],
       [1.41421356, 2.23606798, 2.        ],
       [2.23606798, 1.73205081, 3.        ]])

In [86]:
arr1.sum()

np.int64(40)

In [87]:
arr2.max()

np.int64(4)

In [88]:
arr1.min()

np.int64(1)

In [156]:
np.sort(arr1)

array([[1, 4, 7],
       [0, 2, 5],
       [3, 5, 9]])

In [157]:
np.sort(arr2, axis=0)

array([[1, 1, 2],
       [2, 3, 3],
       [3, 4, 4]])

In [159]:
%%time
np.sort(arr1, axis=1, kind='mergesort')

CPU times: total: 0 ns
Wall time: 0 ns


array([[1, 4, 7],
       [0, 2, 5],
       [3, 5, 9]])

In [89]:
np.where(arr1>4)

(array([0, 1, 2, 2]), array([2, 1, 0, 2]))

In [90]:
type(np.where(arr1>4))

tuple

In [91]:
np.count_nonzero(arr2)

9

In [92]:
np.nonzero(arr2)

(array([0, 0, 0, 1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2, 0, 1, 2]))

In [93]:
arr1[1,2] = 0

In [99]:
arr1.tolist()                                         # creates python list of numpy array

[[1, 4, 7], [2, 5, 0], [5, 3, 9]]

[NumPy methods and attributes](https://docs.scipy.org/doc/numpy-1.6.0/reference/generated/numpy.ndarray.html)

-----------------------------------------------------------------------------------------

## Space Efficiency

In [None]:
import sys

py_arr = [5, 4, 7]
np_arr = np.array(py_arr)


In [96]:
sys.getsizeof(1) * len(py_arr)

84

In [97]:
np_arr.itemsize * np_arr.size

24

----------------------------------------------------------------------------------------

## Time Efficiency

In [103]:
arr_np = np.arange(1000000)
list_py = list(range(1000000))

In [104]:
%time for _ in range(10) : [item *3 for item in list_py]

CPU times: total: 344 ms
Wall time: 761 ms


In [105]:
%time for _ in range(10) : arr_np = arr_np *3

CPU times: total: 0 ns
Wall time: 44.1 ms
