# Numpy

NumPy is a general-purpose array-processing package. It provides a high-performance multidimensional array object, and tools for working with these arrays. It is the fundamental package for scientific computing with Python

## Why is NumPy fast?
- Vectorization describes the absence of any explicit looping, indexing, etc., in the code - these things are taking place, of course, just “behind the scenes” in optimized, pre-compiled C code. Vectorized code has many advantages, among which are:

- vectorized code is more concise and easier to read

- fewer lines of code generally means fewer bugs

- the code more closely resembles standard mathematical notation (making it easier, typically, to correctly code mathematical constructs)

- vectorization results in more “Pythonic” code. Without vectorization, our code would be littered with inefficient and difficult to read for loops.

## What is an array
- An array is a data structure that stores values of same data type. In Python, this is the main difference between arrays and lists. While python lists can contain values corresponding to different data types, arrays in python can only contain values corresponding to same data type

In [1]:
import numpy as np

In [2]:
my_lst=[1,2,3,4,5]

arr=np.array(my_lst)

In [3]:
print(arr)

[1 2 3 4 5]


In [4]:
type(arr)

numpy.ndarray

In [12]:
## Multinested array
my_lst1=[1,2,3,4,5]
my_lst2=[2,3,4,5,6]
my_lst3=[9,7,6,8,9]

arr1=np.array([my_lst1,my_lst2,my_lst3])

In [13]:
arr1

array([[1, 2, 3, 4, 5],
       [2, 3, 4, 5, 6],
       [9, 7, 6, 8, 9]])

In [7]:
## check the shape of the array

arr.shape

(3, 5)

## Indexing

In [9]:
my_lst=[1,2,3,4,5]

arr=np.array(my_lst)

In [10]:
## Accessing the array elements

arr

array([1, 2, 3, 4, 5])

In [11]:
arr[3]

4

In [14]:
arr1

array([[1, 2, 3, 4, 5],
       [2, 3, 4, 5, 6],
       [9, 7, 6, 8, 9]])

In [15]:
arr1[1:,:2]

array([[2, 3],
       [9, 7]])

In [16]:
arr1[:,3:]

array([[4, 5],
       [5, 6],
       [8, 9]])

In [17]:
arr

array([1, 2, 3, 4, 5])

In [18]:
arr[3:]=100

In [19]:
arr

array([  1,   2,   3, 100, 100])

In [24]:
arr[2:]=100

In [25]:
arr

array([  1,   2, 100, 100, 100])

## Creating Arrays

### From list / tuple

In [26]:
a = np.array([1, 2, 3])

In [27]:
a

array([1, 2, 3])

### Zero/Ones

In [29]:
np.zeros((2, 3))


array([[0., 0., 0.],
       [0., 0., 0.]])

In [30]:
np.ones((3, 2))

array([[1., 1.],
       [1., 1.],
       [1., 1.]])

### Range / Random

In [36]:
np.random.randint(1, 10, size=(3, 3))


array([[4, 2, 9],
       [4, 3, 6],
       [1, 4, 5]])

In [31]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [32]:
np.arange(0, 10, 2)

array([0, 2, 4, 6, 8])

In [33]:
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [34]:
np.random.rand(3, 3)

array([[0.15076019, 0.08682067, 0.90004063],
       [0.70664481, 0.47422101, 0.59558624],
       [0.60037933, 0.82522903, 0.73629568]])

In [35]:
np.random.randn(3, 3)

array([[ 0.79601987,  1.3656323 , -0.12959906],
       [ 0.58313045,  0.53964368, -1.11627258],
       [-0.0639479 , -0.6626367 ,  0.78479084]])

### Array Info & Properties

In [41]:
arr.itemsize #how much memory one element takes

4

In [37]:
arr.shape

(5,)

In [38]:
arr.ndim

1

In [39]:
arr.size #how many elements

5

In [40]:
arr.dtype

dtype('int32')

### Indexing and Slicing

In [53]:
arr = arr1[1:3, 0:2]
arr

array([[2, 3],
       [9, 7]])

In [42]:
arr[0]

1

In [45]:
arr1

array([[1, 2, 3, 4, 5],
       [2, 3, 4, 5, 6],
       [9, 7, 6, 8, 9]])

In [44]:
arr1[1, 2]

4

In [46]:
arr[:2]

array([1, 2])

In [48]:
arr1[:, 1]

array([2, 3, 7])

### Reshaping and Transpose

In [62]:
arr1

array([[1, 2, 3, 4, 5],
       [2, 3, 4, 5, 6],
       [9, 7, 6, 8, 9]])

In [56]:
arr1.reshape(5, 3)

array([[1, 2, 3],
       [4, 5, 2],
       [3, 4, 5],
       [6, 9, 7],
       [6, 8, 9]])

In [58]:
arr1.flatten() # conver into one dimensional and safe copy

array([1, 2, 3, 4, 5, 2, 3, 4, 5, 6, 9, 7, 6, 8, 9])

In [59]:
arr1.ravel() # conver into one dimensional and fast view

array([1, 2, 3, 4, 5, 2, 3, 4, 5, 6, 9, 7, 6, 8, 9])

In [63]:
arr1.T

array([[1, 2, 9],
       [2, 3, 7],
       [3, 4, 6],
       [4, 5, 8],
       [5, 6, 9]])

### Mathematical Operations

In [73]:
np.sqrt(a)

array([[1.38629436, 1.79175947],
       [2.07944154, 2.30258509]])

In [65]:
arr + 10

array([[12, 13],
       [19, 17]])

In [66]:
arr * 2

array([[ 4,  6],
       [18, 14]])

In [70]:
arr1 = ([4,6],[8,10])
arr2 = ([10,12],[14,16])
a= np.array(arr1)
b= np.array(arr1)
a+b

array([[ 8, 12],
       [16, 20]])

In [72]:
a.dot(b)

array([[ 64,  84],
       [112, 148]])

In [74]:
np.exp(a)


array([[   54.59815003,   403.42879349],
       [ 2980.95798704, 22026.46579481]])

In [75]:
np.log(a)

array([[1.38629436, 1.79175947],
       [2.07944154, 2.30258509]])

### Aggregation Functions

In [76]:
np.sum(a)

28

In [77]:
np.mean(a)


7.0

In [79]:
np.median(a) #Median of all values

7.0

In [83]:
np.std(a)

2.23606797749979

In [81]:
np.median(a, axis=0) #Column-wise median

array([6., 8.])

In [82]:
np.median(a, axis=1) #Row-wise median

array([5., 9.])

In [84]:
np.var(a)

5.0

In [85]:
np.min(a)

4

In [86]:
np.max(a)

10

In [89]:
np.sum(a, axis=0)  # column-wise

array([12, 16])

In [88]:
np.sum(a, axis=1)  # row-wise

array([10, 18])

In [90]:
a > 5

array([[False,  True],
       [ True,  True]])

In [91]:
a[a > 5]


array([ 6,  8, 10])

In [93]:
np.where(a > 5, 1, 0) #Performs conditional element-wise selection True = 1 and False = 0.

array([[0, 1],
       [1, 1]])

### Statistical Functions (Very Important)

In [96]:
np.percentile(a, 60) #Returns the 60th percentile of the data.


7.6

In [97]:
np.corrcoef(a, b)

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [98]:
np.unique(a)

array([ 4,  6,  8, 10])

### Linear Algebra



In [99]:
np.dot(a, b)

array([[ 64,  84],
       [112, 148]])

In [100]:
a @ b

array([[ 64,  84],
       [112, 148]])

In [101]:
np.linalg.det(a) #Determinant

-7.999999999999998

In [102]:
np.linalg.inv(a) #Inverse of matrix

array([[-1.25,  0.75],
       [ 1.  , -0.5 ]])

In [103]:
np.linalg.eig(a) #Eigenvalues & Eigenvectors

EigResult(eigenvalues=array([-0.54983444, 14.54983444]), eigenvectors=array([[-0.79681209, -0.49436913],
       [ 0.60422718, -0.86925207]]))

### Stack, Split & Join

In [104]:
np.vstack((a, b)) #Stacks arrays row-wise (top to bottom)

array([[ 4,  6],
       [ 8, 10],
       [ 4,  6],
       [ 8, 10]])

In [105]:
np.hstack((a, b)) #Stacks arrays column-wise (side by side)

array([[ 4,  6,  4,  6],
       [ 8, 10,  8, 10]])

In [106]:
np.concatenate((a, b), axis=0) #General stacking function axis=0 = same as vstack

array([[ 4,  6],
       [ 8, 10],
       [ 4,  6],
       [ 8, 10]])

In [108]:
arr = np.array([1, 2, 3, 4, 5, 6])
np.split(arr, 3)

[array([1, 2]), array([3, 4]), array([5, 6])]

### Handling Missing / Special Values

In [109]:
np.nan

nan

In [110]:
np.isnan(arr)

array([False, False, False, False, False, False])

In [111]:
np.isinf(arr)

array([False, False, False, False, False, False])

In [112]:
np.nanmean(arr)

3.5

### Broadcasting 

In [113]:
arr = np.array([[1,2,3],[4,5,6]])
arr + np.array([10,20,30])


array([[11, 22, 33],
       [14, 25, 36]])

#### Some conditions very useful in Exploratory Data Analysis 

In [114]:
val=2

arr[arr<3]

array([1, 2])

In [115]:
# Create arrays and reshape

np.arange(0,10).reshape(5,2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [116]:
arr1=np.arange(0,10).reshape(2,5)

In [117]:
arr2=np.arange(0,10).reshape(2,5)

In [118]:
arr1 * arr2

array([[ 0,  1,  4,  9, 16],
       [25, 36, 49, 64, 81]])