# Numpy

Numpy is general purpose array processing package. It provides high performance multidimensional array object and tools for working with these arrays. It is fundamental package for scientific computing with Python.

In [2]:
#Importing numpy
import numpy as np

In [3]:
myList = [0,1,2,3,4,5,6,7,8,9]

In [4]:
#Creating an array from a list
arr = np.array(myList) 

In [5]:
#Checking the datatype
type(arr)

numpy.ndarray

In [6]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [7]:
#Checking the dimension of the array
arr.shape

(10,)

In [8]:
#Converting into 2 dimensional array
arr.reshape(2,5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

### 1D Array has only one pair of opening and closing bracket, while 2D has 2.

In [9]:
#Creating a 2D array
myList1 = [2,4,6,8,10]
myList2 = [3,6,9,12,15]
myList3 = [4,8,12,16,20]

arr2 = np.array([myList1, myList2, myList3])

In [10]:
arr2

array([[ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20]])

In [11]:
#Checking the shape of the newly created 2D array
arr2.shape

(3, 5)

In [12]:
#Reshaping the new array
arr2.reshape(5,3)

array([[ 2,  4,  6],
       [ 8, 10,  3],
       [ 6,  9, 12],
       [15,  4,  8],
       [12, 16, 20]])

## Indexing the NumPy Arrays

### Indexing 1D Array

In [13]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [14]:
arr[6]

6

### Indexing 2D Array

In [15]:
arr2

array([[ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20]])

In [16]:
arr2[:,:]

array([[ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20]])

In [17]:
arr2[2,1:]

array([ 8, 12, 16, 20])

In [18]:
arr2[0:2,:]

array([[ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15]])

In [19]:
arr2[0:2,0:2]

array([[2, 4],
       [3, 6]])

In [20]:
arr2[1,1:4]

array([ 6,  9, 12])

In [23]:
#Creating 1D array using arange function
arr3 = np.arange(0,10)
arr3

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [24]:
arr3 = np.arange(0,100,11)
arr3

array([ 0, 11, 22, 33, 44, 55, 66, 77, 88, 99])

In [25]:
#Creating an array of equally spaced values using linspace
arr4 = np.linspace(0,1,50)
arr4

array([0.        , 0.02040816, 0.04081633, 0.06122449, 0.08163265,
       0.10204082, 0.12244898, 0.14285714, 0.16326531, 0.18367347,
       0.20408163, 0.2244898 , 0.24489796, 0.26530612, 0.28571429,
       0.30612245, 0.32653061, 0.34693878, 0.36734694, 0.3877551 ,
       0.40816327, 0.42857143, 0.44897959, 0.46938776, 0.48979592,
       0.51020408, 0.53061224, 0.55102041, 0.57142857, 0.59183673,
       0.6122449 , 0.63265306, 0.65306122, 0.67346939, 0.69387755,
       0.71428571, 0.73469388, 0.75510204, 0.7755102 , 0.79591837,
       0.81632653, 0.83673469, 0.85714286, 0.87755102, 0.89795918,
       0.91836735, 0.93877551, 0.95918367, 0.97959184, 1.        ])

In [28]:
#Copy() function and broadcasting
arr5 = np.arange(1,15)
arr5
arr5[5:] = 99
arr5

array([ 1,  2,  3,  4,  5, 99, 99, 99, 99, 99, 99, 99, 99, 99])

#### Array is a reference type as they are sharing the same memory. Thus, any operation performed on 1 variable will also impact memory of the other variable.

In [29]:
arr6 = arr5
arr6

array([ 1,  2,  3,  4,  5, 99, 99, 99, 99, 99, 99, 99, 99, 99])

In [31]:
arr6[5:] = 101
arr6

array([  1,   2,   3,   4,   5, 101, 101, 101, 101, 101, 101, 101, 101,
       101])

In [32]:
arr5

array([  1,   2,   3,   4,   5, 101, 101, 101, 101, 101, 101, 101, 101,
       101])

In [33]:
# To prevent this referencing copy() function can be used
arr7 = arr6.copy()
print(arr6)
arr7[5:] = 1001
print(arr7)
print(arr6)

[  1   2   3   4   5 101 101 101 101 101 101 101 101 101]
[   1    2    3    4    5 1001 1001 1001 1001 1001 1001 1001 1001 1001]
[  1   2   3   4   5 101 101 101 101 101 101 101 101 101]


#### Some useful conditions in exploratory data analysis


In [38]:
val = 9
arr2 > val

array([[False, False, False, False,  True],
       [False, False, False,  True,  True],
       [False, False,  True,  True,  True]])

In [39]:
arr2**2

array([[  4,  16,  36,  64, 100],
       [  9,  36,  81, 144, 225],
       [ 16,  64, 144, 256, 400]], dtype=int32)

In [40]:
arr2/4

array([[0.5 , 1.  , 1.5 , 2.  , 2.5 ],
       [0.75, 1.5 , 2.25, 3.  , 3.75],
       [1.  , 2.  , 3.  , 4.  , 5.  ]])

In [41]:
# Returning the actual values of the array smaller than val
arr2[arr2<val]

array([2, 4, 6, 8, 3, 6, 4, 8])

In [8]:
# Creating an array with a similar element

np.ones(4, dtype=int) #dtype by default is float

array([1, 1, 1, 1])

In [9]:
np.ones(4,dtype=float)

array([1., 1., 1., 1.])

In [11]:
np.ones((2,5), dtype = int)

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [13]:
# Random Distribution
# The randomly selected elements would be greater than 0 and less than 1
np.random.rand(3,3)

array([[0.10564048, 0.44684731, 0.13403931],
       [0.0320445 , 0.32533534, 0.91930747],
       [0.24989854, 0.68054504, 0.69087483]])

In [17]:
# Standard Normal Distribution
arr8 = np.random.randn(3,3)
arr8

array([[-1.10494644, -0.81090447, -0.00453452],
       [ 2.42931377, -1.41522235,  1.4101435 ],
       [-2.03309515,  0.98609031, -0.75600542]])

In [18]:
# Creating an array of random numbers between 2 endpoints
np.random.randint(10,100,8)

array([79, 11, 38, 65, 14, 79, 20, 85])

In [19]:
np.random.randint(10,100,8).reshape(2,4)

array([[39, 40, 37, 43],
       [95, 84, 70, 98]])