In [3]:
!pip3 install numpy



## We are going to learn numpy now!

### NumPy is a general-purpose array processing package. It provides a high-performance multidimensional array object and
### tools for working with these arrays.

### It is the fundamental package for scientific computing with Python. It contains among other things:
    - a powerful N-dimensional array object.
    - sophisticated functions.
    - tools for integrating C/C++ and Fortran code.
    - useful linear algebra, Fourier Transform and random number capabilities.

### NumPy can also be used for efficient multi-dimensional containers of generic data. Arbitrary data types can be defined. This allows NumPy to seamlessly and speedily integrate with a wide variety of databases.

In [10]:
import numpy as np

In [33]:
arr = np.array([[1, 2, 3],
                [4, 2, 5]]) #numpy array has significance over simple python array

In [37]:
arr

array([[1, 2, 3],
       [4, 2, 5]])

In [14]:
type(arr)

numpy.ndarray

### This array has:
- rank = 2 (as it is 2-D)
- first dimensional length = 2, second dimensional has length = 3.
- overall shape can be expressed as : (2,3)

In [17]:
arr.ndim #Dimension

2

In [19]:
arr.shape #shows (row,col) of array

(2, 3)

In [21]:
arr.size

6

In [23]:
arr1 = np.array([1,2,3])

In [27]:
arr1.shape

(3,)

In [29]:
# (3,) denotes that array is 1-D and column size is 3

In [35]:
arr.dtype #datatype of elements in array

dtype('int32')

In [39]:
# creating array from list with type float
a = np.array([[1, 2, 3], [5, 8, 7]], dtype = 'float')
print("Array created using passed list:\n", a)

Array created using passed list:
 [[1. 2. 3.]
 [5. 8. 7.]]


In [57]:
# creating array from tuple
b = np.array(((1,2,3), (4,5,6)))
print("\nArray created using passed tuple:\n", b)


Array created using passed tuple:
 [[1 2 3]
 [4 5 6]]


In [49]:
# null matrix
c = np.zeros((3,4))

In [53]:
c

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [59]:
np.ones((3,4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [61]:
np.eye(3) #identity matrix

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [65]:
#arange function
np.arange?

[1;31mDocstring:[0m
arange([start,] stop[, step,], dtype=None, *, like=None)

Return evenly spaced values within a given interval.

``arange`` can be called with a varying number of positional arguments:

* ``arange(stop)``: Values are generated within the half-open interval
  ``[0, stop)`` (in other words, the interval including `start` but
  excluding `stop`).
* ``arange(start, stop)``: Values are generated within the half-open
  interval ``[start, stop)``.
* ``arange(start, stop, step)`` Values are generated within the half-open
  interval ``[start, stop)``, with spacing between values given by
  ``step``.

For integer arguments the function is roughly equivalent to the Python
built-in :py:class:`range`, but returns an ndarray rather than a ``range``
instance.

When using a non-integer step, such as 0.1, it is often better to use
`numpy.linspace`.


Parameters
----------
start : integer or real, optional
    Start of interval.  The interval includes this value.  The default
    st

In [67]:
# linspace
np.linspace?

[1;31mSignature:[0m      
[0mnp[0m[1;33m.[0m[0mlinspace[0m[1;33m([0m[1;33m
[0m    [0mstart[0m[1;33m,[0m[1;33m
[0m    [0mstop[0m[1;33m,[0m[1;33m
[0m    [0mnum[0m[1;33m=[0m[1;36m50[0m[1;33m,[0m[1;33m
[0m    [0mendpoint[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mretstep[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mdtype[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0maxis[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mCall signature:[0m  [0mnp[0m[1;33m.[0m[0mlinspace[0m[1;33m([0m[1;33m*[0m[0margs[0m[1;33m,[0m [1;33m**[0m[0mkwargs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mType:[0m            _ArrayFunctionDispatcher
[1;31mString form:[0m     <function linspace at 0x0000020AE95A7B00>
[1;31mFile:[0m            c:\users\mishr\anaconda3\lib\site-packages\numpy\core\function_base.py
[1;31mDocstring:[0m      
Return evenly spaced 

In [69]:
# create a sequence of integers from 0 to 30 with step of 5
f = np.arange(0, 30, 5)
print("\n A sequential array with steps of 5:\n", f)

# create a sequence of 10 values in range 0 to 5
g = np.linspace(0,5,10)
print("\nA sequential array with 10 values between 0 and 5: \n", g)


 A sequential array with steps of 5:
 [ 0  5 10 15 20 25]

A sequential array with 10 values between 0 and 5: 
 [0.         0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]


In [71]:
# we can deduce that in arange 0 t0 n, n is not inclusive and step size denotes the difference between adjacent numbers
# on the other hand in linspace step size is not known but is equally space and in 0 to n, n is inclusive and step size = numbers of values in the list

In [73]:
np.random.random((2,2)) #generate array of random values

array([[0.74511698, 0.71033913],
       [0.27912686, 0.84411297]])

In [77]:
arr = np.array([[1,2,3,4],
               [5,6,7,8],
               [9,10,11,12]])
newarr = arr.reshape(2,2,3)

In [79]:
newarr

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [81]:
arr = np.array([[1,2,3,4],
               [5,6,7,8],
               [9,10,11,12]])
newarr = arr.reshape(4,3)

In [83]:
newarr


array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [85]:
arr = np.array([[1,2,3], [4,5,6]])
flarr = arr.flatten()

In [87]:
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [89]:
flarr

array([1, 2, 3, 4, 5, 6])

# Array Indexing & Slicing
-Syntax : arr[row_start : end+1 : step,  col_start : end+1 : step]

In [133]:
arr = np.array([[-1,2,0,4], [4,-0.5,6,0], [2.6,0,7,8], [3,-7,4,2.0]])

In [125]:
arr

array([[-1. ,  2. ,  0. ,  4. ],
       [ 4. , -0.5,  6. ,  0. ],
       [ 2.6,  0. ,  7. ,  8. ],
       [ 3. , -7. ,  4. ,  2. ]])

In [96]:
arr[1:, :3] #indexing

array([[ 4. , -0.5,  6. ],
       [ 2.6,  0. ,  7. ],
       [ 3. , -7. ,  4. ]])

In [98]:
arr[:3, :3]

array([[-1. ,  2. ,  0. ],
       [ 4. , -0.5,  6. ],
       [ 2.6,  0. ,  7. ]])

In [100]:
arr[1:2, 1:2]

array([[-0.5]])

In [104]:
arr[1:, :3:2] #step size SLICING

array([[4. , 6. ],
       [2.6, 7. ],
       [3. , 4. ]])

In [106]:
arr[1::2, :3:2]

array([[4., 6.],
       [3., 4.]])

In [137]:
#integer array indexing
temp = arr[[0, 1, 2, 3], [3, 2, 1, 0]]
print("\nElements at indices (0,3), (1,2), (2,1), (3,0):\n", temp)


Elements at indices (0,3), (1,2), (2,1), (3,0):
 [4. 6. 0. 3.]


In [139]:
# boolean array indexing
cond = arr>0
temp = arr[cond]
print(temp)

[2.  4.  4.  6.  2.6 7.  8.  3.  4.  2. ]


# Basic Operations
    -NumPy offers a BROADCASTING feature that helps in easy array operations
    -Every elements of the array is broadcasted and any operation is applied to each element individually

# What is NumPy Array Broadcasting?
Broadcasting provides a means of vectorizing array operations, therefore eliminating the need for Python loops. This is because NumPy is implemented in C Programming, which is a very efficient language.

It does this without making needless copies of data which leads to efficient algorithm implementations. But broadcasting over multiple arrays in NumPy extension can raise cases where broadcasting is a bad idea because it leads to inefficient use of memory that slows down the computation.

The resulting array returned after broadcasting will have the same number of dimensions as the array with the greatest number of dimensions.

In [145]:
a = np.array([1,2,5,3])

In [147]:
a

array([1, 2, 5, 3])

In [149]:
a+1 #add 1 to every element

array([2, 3, 6, 4])

In [151]:
a-1

array([0, 1, 4, 2])

In [153]:
a*10

array([10, 20, 50, 30])

In [155]:
a**2

array([ 1,  4, 25,  9])

In [157]:
a*=2 #modify existing array

In [159]:
a

array([ 2,  4, 10,  6])

In [161]:
#Transpose of array
a = np.array([[1,2,3], [3,4,5], [9,6,0]])
print("\nOriginal array:\n", a)
print("Transpose of array:\n", a.T)


Original array:
 [[1 2 3]
 [3 4 5]
 [9 6 0]]
Transpose of array:
 [[1 3 9]
 [2 4 6]
 [3 5 0]]


# Unary Operators
    -These are provided as a method of ndarray class. This includes sum, min, max, etc.
    -These functions can also be applied row-wise and col-wise by setting an axis parameter.
    -in 2D array axis = 0 is a column-wise parameter and axis = 1 is a row-wise parameter

In [164]:
arr = np.array([[1,5,6],
               [4,7,2],
               [3,1,9]])

In [166]:
arr

array([[1, 5, 6],
       [4, 7, 2],
       [3, 1, 9]])

In [168]:
arr.max()

9

In [172]:
arr.max(axis = 1) #Row wise maximum elements

array([6, 7, 9])

In [174]:
arr.min()

1

In [176]:
arr.min(axis = 0) #Column wise minimum element

array([1, 1, 2])

In [178]:
arr.sum()

38

In [181]:
#cumulative sum along each row
arr.cumsum(axis = 1)

array([[ 1,  6, 12],
       [ 4, 11, 13],
       [ 3,  4, 13]])

# Binary Operators

In [184]:
a = np.array([[1,2],
             [3,4]])
b = np.array([[5,6],
             [7,8]])

In [186]:
a

array([[1, 2],
       [3, 4]])

In [188]:
b

array([[5, 6],
       [7, 8]])

In [190]:
a+b

array([[ 6,  8],
       [10, 12]])

In [192]:
a*b # cartesian product

array([[ 5, 12],
       [21, 32]])

In [194]:
#matrix multiplication
a.dot(b)

array([[19, 22],
       [43, 50]])

In [196]:
# all these can also be done using np.add, np.subtract, np.multiply, np.divide, np.sum, etc : these are Universal functions (ufunc)

In [198]:
# create an array of sine values
a = np.array([0, np.pi/2, np.pi])
print(np.sin(a))

[0.0000000e+00 1.0000000e+00 1.2246468e-16]


In [200]:
#exponential values
b = np.array([0,1,2,3])

In [202]:
b

array([0, 1, 2, 3])

In [206]:
np.exp(b) #e^num

array([ 1.        ,  2.71828183,  7.3890561 , 20.08553692])

In [210]:
#square root of array values
np.sqrt(b)

array([0.        , 1.        , 1.41421356, 1.73205081])

# Sorting array

In [221]:
a = np.array([[1, 4, 2],
             [3, 4, 6],
             [0, -1, 5]])

In [223]:
a

array([[ 1,  4,  2],
       [ 3,  4,  6],
       [ 0, -1,  5]])

In [225]:
#sorted array
np.sort(a, axis = 1) # row wise

array([[ 1,  2,  4],
       [ 3,  4,  6],
       [-1,  0,  5]])

In [227]:
np.sort(a, axis = None)

array([-1,  0,  1,  2,  3,  4,  4,  5,  6])

In [229]:
# specify sort algorithms
np.sort(a, axis = 0, kind = 'mergesort')

array([[ 0, -1,  2],
       [ 1,  4,  5],
       [ 3,  4,  6]])

In [245]:
# sorting of structured array
##set alias for dtypes
dtypes = [('name', 'S10'), ('grad_year', int), ('cgpa', float)]
## values to be put in array
values = [('Ramya', 2025, 8.5 ), ('Richa', 2027, 8.4), ('Rashika', 2024, 8.2)]

##creating array
arr = np.array(values, dtype = dtypes)

In [239]:
arr

array([(b'Ramya', 2026, 8.5), (b'Richa', 2026, 8.4),
       (b'Rashika', 2026, 8.2)],
      dtype=[('name', 'S10'), ('grad_year', '<i4'), ('cgpa', '<f8')])

In [241]:
np.sort(arr, order = 'name')

array([(b'Ramya', 2026, 8.5), (b'Rashika', 2026, 8.2),
       (b'Richa', 2026, 8.4)],
      dtype=[('name', 'S10'), ('grad_year', '<i4'), ('cgpa', '<f8')])

In [247]:
np.sort(arr, order = ['grad_year', 'cgpa']) #grad year will first be considered, if same then cgpa will be looked into

array([(b'Rashika', 2024, 8.2), (b'Ramya', 2025, 8.5),
       (b'Richa', 2027, 8.4)],
      dtype=[('name', 'S10'), ('grad_year', '<i4'), ('cgpa', '<f8')])

# Stacking and Splitting
## Several arrays can be stacked together along different axes.

- **np.vstack**: To stack along vertical axis
- **np.hstack**: To stack along horizontal axis
- **np.column_stack**: To stack 1-D array as columns into 2-D arrays.
- **np.concatenate**: To stack arrays along specified axis( axis is passed as parameter)


In [252]:
a = np.array([[1,2],
             [3,4]])

In [254]:
a

array([[1, 2],
       [3, 4]])

In [256]:
b = np.array([[5,6],
             [7,8]])

In [257]:
b

array([[5, 6],
       [7, 8]])

In [262]:
#vertical stacking
np.vstack((a,b))

array([[1, 2],
       [3, 4],
       [5, 6],
       [7, 8]])

In [264]:
np.hstack((a,b))

array([[1, 2, 5, 6],
       [3, 4, 7, 8]])

In [266]:
c = [5,6]

In [268]:
#stacking columns
np.column_stack((a,c))

array([[1, 2, 5],
       [3, 4, 6]])

In [273]:
#concatenation method
np.concatenate((a,b), 1) #row wise concatenate

array([[1, 2, 5, 6],
       [3, 4, 7, 8]])

## For splitting we have these functions: Split arrays into more arrays
- **np.hsplit**: Split array along horizontal axis
- **np.vsplit**: Split array along vertical axis
- **np.array_split**: Split array along specified axis

In [279]:
a = np.array([[1, 3, 5 ,7 ,9 ,11],
             [2, 4, 6, 8, 10, 12]])


In [280]:
a

array([[ 1,  3,  5,  7,  9, 11],
       [ 2,  4,  6,  8, 10, 12]])

In [283]:
np.hsplit(a, 2)

[array([[1, 3, 5],
        [2, 4, 6]]),
 array([[ 7,  9, 11],
        [ 8, 10, 12]])]

In [285]:
np.vsplit(a, 2)

[array([[ 1,  3,  5,  7,  9, 11]]), array([[ 2,  4,  6,  8, 10, 12]])]

# Statistical Functions
- **min , max**
- **mean**
- **median**
- **average**
- **variance**
- **standard deviation**

In [289]:
a = np.array([[1,2,3,4], [7,6,2,0]])

In [291]:
a

array([[1, 2, 3, 4],
       [7, 6, 2, 0]])

In [293]:
np.min(a)

0

In [295]:
np.min(a, axis = 0)

array([1, 2, 2, 0])

In [297]:
np.min(a, axis = 1)

array([1, 0])

In [299]:
c = np.array([1,5,4,2,0])

In [301]:
c

array([1, 5, 4, 2, 0])

In [303]:
np.median(c) #median is the mid point of the array after sorting it so sorted array will be : 0 , 1, 2, 4, 5

2.0

In [305]:
#if there are two median values sum them and divide by 2
b = np.array([1,2,3,4,5,6])
np.median(b) ## 3+4/2

3.5

In [307]:
np.mean(c) ##average sum : 12/5

2.4

In [313]:
#weights
w = np.array([1,1,1,1,1])
np.average(c, weights=w) #weighted mean = n1*w1 + n2*w2..../n1+n2.....

2.4

In [319]:
#Standard Deviation 
'''
Standard deviation is a number that describes how spread out the values are.

A low standard deviation means that most of the numbers are close to the mean (average) value.

A high standard deviation means that the values are spread out over a wider range.'''
mystd = np.std(c) 
print(mystd)

1.854723699099141


In [321]:
# Variance 
'''Variance is another number that indicates how spread out the values are.

In fact, if you take the square root of the variance, you get the standard deviation!

Or the other way around, if you multiply the standard deviation by itself, you get the variance!'''

print(np.var(c))
print(mystd**2)

3.4400000000000004
3.440000000000001


# Numpy Random Module
- **rand**: Random values in a given shape.
- **randn**: Return a sample (or samples) from the "standard normal" distribution.
- **randint**: Return a random integers from low(inclusive) to high(exclusive).
- **random**: Return random floats in the hald-open interval(0.0, 1.0)
- **choice**: Generates a random sample from a given 1-D array
- **shuffle**: Shuffles the contents of a sequence

In [344]:
a = np.arange(10) + 5

In [346]:
a

array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [348]:
np.random.shuffle(a)

In [350]:
a

array([14, 10,  8,  5, 13,  9,  7,  6, 11, 12])

In [352]:
np.random.randn(2,3)

array([[ 0.19664341, -1.54521415, -0.08810409],
       [ 0.85223932,  0.67723401, -0.10687843]])

In [354]:
np.random.randint(5,10,3)

array([6, 6, 6])

In [360]:
np.random.choice([1,4,3,2,11,27])

11

In [366]:
np.random.rand(2,3,4,5)

array([[[[0.82914691, 0.82960336, 0.27304997, 0.0592432 , 0.67052804],
         [0.59306552, 0.6716541 , 0.41178788, 0.1975509 , 0.28962964],
         [0.14212014, 0.78331447, 0.41253884, 0.03417131, 0.62402999],
         [0.66063573, 0.29849529, 0.44613451, 0.22212455, 0.07336417]],

        [[0.46923853, 0.09617226, 0.90337017, 0.11949047, 0.52479938],
         [0.083623  , 0.91686133, 0.91044838, 0.29893011, 0.58438912],
         [0.56591203, 0.61393832, 0.95653566, 0.26097898, 0.23101542],
         [0.53344849, 0.94993814, 0.49305959, 0.54060051, 0.7654851 ]],

        [[0.04534573, 0.13996612, 0.79240359, 0.02980136, 0.88312548],
         [0.54078819, 0.44798018, 0.89213587, 0.37758434, 0.53842469],
         [0.65229888, 0.36126102, 0.57100856, 0.63783648, 0.12631489],
         [0.69020459, 0.6477494 , 0.35393909, 0.76323305, 0.35653172]]],


       [[[0.75278835, 0.88134183, 0.01166919, 0.49810907, 0.07379201],
         [0.78695147, 0.06406733, 0.35531036, 0.94183695, 0.37980329]