# Numpy

![Numpyimage](images/Numpy_image.jpg)

Array is collection of homogeneous data type elements

Numpy is not python, its a python tool/library written in optimized C , which makes it more efficient and fast to do calculations

# np.array()

In [8]:
import numpy as np

In [9]:
array_n1 = np.array([1,2,3])

In [10]:
print(array_n1)
print(type(array_n1))
print(type(array_n1[0])) # by default the type is int 64
# we have int8, 16, 32, 64 / float16, 32, 64 / complex64, 128 / np.bool_ / np.str_ / np.object_

[1 2 3]
<class 'numpy.ndarray'>
<class 'numpy.int64'>


In [11]:
array_n2 = np.array([1,2,3], dtype=np.int8) # we have int8, 16, 32, 64 / float16, 32, 64 / complex64, 128 / np.bool_ / np.str_ / np.object_
print(type(array_n2))
print(type(array_n2[0]))

<class 'numpy.ndarray'>
<class 'numpy.int8'>


In [12]:
array_n2_ = np.array([1,2,"hello"]) # just by using one string we are converting whole array of strings
print(type(array_n2_))
print(type(array_n2_[0])) # final array is: ['1', '2', 'hello']


<class 'numpy.ndarray'>
<class 'numpy.str_'>


In [14]:
array_n2_1 = np.array([1,2,"hello", [10,20,30], {'nikunj', 'dhaka'}], dtype = np.object_)
print(type(array_n2_1))
print(type(array_n2_1[0])) 
print(type(array_n2_1[3])) 
# we can change the data type of the numpy array as object and make it act like a list.
# Now it will have heterogeneous (mixed) data types objects inside
# Internally, it behaves more like a Python list, not optimized for performance.
# dtype=object in NumPy arrays will reduce performance compared to standard NumPy arrays with homogeneous data types
#  -- Loss of Vectorization : NumPy's speed comes from vectorized operations using low-level C code.
# When you use object dtype, NumPy can’t apply fast math operations directly — it has to treat each element as a generic Python object.
#  -- More Memory Overhead : Each item is a pointer to a Python object.
#  -- Many NumPy functions won’t work : Functions like .mean(), .sum(), or slicing may fail or behave differently with dtype=object.



<class 'numpy.ndarray'>
<class 'int'>
<class 'list'>


In [49]:
# array_n2 = np.array([1,2,"hello"], dtype=np.int8) # This would throw an error as we cant convrt hello to any integer
array_n2 = np.array([1,2,"3"], dtype=np.int8) # over here "3" is getting converted to int data type, internally
print(array_n2)
print(type(array_n2))
print(type(array_n2[0]))

[1 2 3]
<class 'numpy.ndarray'>
<class 'numpy.int8'>


In [50]:
array_n2 = np.array([1,2,"3"], dtype=np.float16)
print(array_n2)
print(type(array_n2))
print(type(array_n2[2]))

[1. 2. 3.]
<class 'numpy.ndarray'>
<class 'numpy.float16'>


# np.arange(start, stop, step)

In [51]:
print(np.arange(5))

print(np.arange(1, 10, 2))

np_arange = np.arange(1, 10, 2)
print(type(np_arange))

[0 1 2 3 4]
[1 3 5 7 9]
<class 'numpy.ndarray'>


# np.linspace(start, stop, num=x)

equally spaced x numbers between start and stop

In [52]:
np_linspace = np.linspace(1,100,4) # 1 nad 100 are included, by default dtype - float64
print(np_linspace)
print(type(np_linspace))
print(type(np_linspace[0]))

[  1.  34.  67. 100.]
<class 'numpy.ndarray'>
<class 'numpy.float64'>


In [53]:
np_linspace_1 = np.linspace(1,10,10, dtype = np.int16)
print(np_linspace_1)
print(type(np_linspace_1))
print(type(np_linspace_1[0]))

[ 1  2  3  4  5  6  7  8  9 10]
<class 'numpy.ndarray'>
<class 'numpy.int16'>


In [54]:
for i in np.linspace(1,10,10):
    print(i, end = "|")

1.0|2.0|3.0|4.0|5.0|6.0|7.0|8.0|9.0|10.0|

# np.zeros(shape)

In [55]:
npz_1 = np.zeros(5)
print(npz_1.ndim)
print(npz_1)

1
[0. 0. 0. 0. 0.]


In [56]:
npz_2 = np.zeros((5,4))
print(npz_2.ndim)
print(npz_2)
print(type(npz_2[0][0])) # be default each zero is np.float64 data type 

2
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
<class 'numpy.float64'>


In [57]:
npz_3 = np.zeros((5,4,5))
print(npz_3.ndim)
print(npz_3.shape)
print(npz_3)

3
(5, 4, 5)
[[[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]]


In [58]:
z = np.zeros((7,10), dtype = np.int16) # changing the data type to np.int16
print(z)
print(z.ndim)

[[0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]
2


# np.ones()
Create an Array Filled with 1s

np.ones(shape, dtype=None)

Shape of the array — can be a tuple or integer

(Optional) Data type of the array. Default is float64

In [None]:
npo = np.ones((5,4), dtype=np.int8)
print(npo)

[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


# np.full()
Create an Array Filled with a Constant

np.full(shape, fill_value, dtype=None)

In [None]:
npf = np.full((5,5), "A")
print(npf)

[['A' 'A' 'A' 'A' 'A']
 ['A' 'A' 'A' 'A' 'A']
 ['A' 'A' 'A' 'A' 'A']
 ['A' 'A' 'A' 'A' 'A']
 ['A' 'A' 'A' 'A' 'A']]


In [66]:
# npf = np.full((5,5), "A", dtype=np.int16) #this will throw an error as dtype = np.int16 and fill value is str
npf = np.full((5,5), 1)
print(type(npf[0][0]))
print(npf)

<class 'numpy.int64'>
[[1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]]


In [None]:
npf = np.full((5,5), "Nikunj", dtype=np.str_)
# Just the first character of the string is going to print
# NumPy Unicode string (np.str_)
print(npf.dtype) 
print(type(npf[0][0]))
print(npf[0][0])
print(npf)


<U6
<class 'numpy.str_'>
Nikunj
[['Nikunj' 'Nikunj' 'Nikunj' 'Nikunj' 'Nikunj']
 ['Nikunj' 'Nikunj' 'Nikunj' 'Nikunj' 'Nikunj']
 ['Nikunj' 'Nikunj' 'Nikunj' 'Nikunj' 'Nikunj']
 ['Nikunj' 'Nikunj' 'Nikunj' 'Nikunj' 'Nikunj']
 ['Nikunj' 'Nikunj' 'Nikunj' 'Nikunj' 'Nikunj']]


In [85]:
npf = np.full((5,5), "Nikunj")
# npf = np.full((5,5), "Nikunj", dtype = "<U6") # this can also be done
print(npf.dtype) 
print(type(npf[0][0]))
print(npf[0][0])
print(npf)


<U6
<class 'numpy.str_'>
Nikunj
[['Nikunj' 'Nikunj' 'Nikunj' 'Nikunj' 'Nikunj']
 ['Nikunj' 'Nikunj' 'Nikunj' 'Nikunj' 'Nikunj']
 ['Nikunj' 'Nikunj' 'Nikunj' 'Nikunj' 'Nikunj']
 ['Nikunj' 'Nikunj' 'Nikunj' 'Nikunj' 'Nikunj']
 ['Nikunj' 'Nikunj' 'Nikunj' 'Nikunj' 'Nikunj']]


# np.eye(n) - create an identity matrix

np.eye(N, M=None, k=0, dtype=float)

N - Number of rows | M - (Optional) Number of columns (default = N → square matrix)

k -> 1 = upper , -1 = lower , 0 = Main Diagonal

dtype => default = float

create an identity matrix


In [None]:
npe = np.eye(6)
print(npe)

[[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]]


In [92]:
npe = np.eye(3,6,4) # 4 is move diagonal to right side, if its negative diagonal will move downwards
print(npe)

[[0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0.]]


In [98]:
npe = np.eye(6,6,1, dtype = np.int16) # 4 is move diagonal to right side, if its negative diagonal will move downwards
print(npe)
print(type(npe[0][0]))
print(type(npe))

[[0 1 0 0 0 0]
 [0 0 1 0 0 0]
 [0 0 0 1 0 0]
 [0 0 0 0 1 0]
 [0 0 0 0 0 1]
 [0 0 0 0 0 0]]
<class 'numpy.int16'>
<class 'numpy.ndarray'>


# np.empty(shape)
it provides us what ever values were present as garbage

content will be whatever random bytes already exist at that memory location

Faster than np.zeros() or np.ones() — but use it only if you intend to overwrite the values.

In [99]:
npem = np.empty((5,5))
print(npem)

[[5.e-324 5.e-324 5.e-324 5.e-324 5.e-324]
 [5.e-324 5.e-324 5.e-324 5.e-324 5.e-324]
 [5.e-324 5.e-324 5.e-324 5.e-324 5.e-324]
 [5.e-324 5.e-324 5.e-324 5.e-324 5.e-324]
 [5.e-324 5.e-324 5.e-324 5.e-324 5.e-324]]


In [103]:
np.empty((5,5), dtype=np.int8)

array([[   0,    0,    0,    0,    0],
       [   0,    0,    0,    0, -128],
       [   8,  -14,   30,    2,    0],
       [   0, -112,  119,    8,  -14],
       [  30,    2,    0,    0,  -48]], dtype=int8)

In [104]:
np.empty((5,5), dtype=int)
# Use dtype=int if you don’t care about memory and want Python’s default integer size (usually 64-bit)

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [105]:
a = np.empty((5,5))
a.fill(5)    # fills every element with 5
# a[:] = 5 # same as above - called broadcasting technique
a

array([[5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.]])

In [115]:
a = np.empty((5,7), dtype="<U7")
a[2][4] = 4 # this int will get auto converted to U7 string
print(type(a[2][4]))
a

<class 'numpy.str_'>


array([['', '', '', '', '', '', ''],
       ['', '', '', '', '', '', ''],
       ['', '', '', '', '4', '', ''],
       ['', '', '', '', '', '', ''],
       ['', '', '', '', '', '', '']], dtype='<U7')

In [122]:
import time

start_e = time.perf_counter() # perf_counter() is used to get high-resolution time counter.
np.empty((5, 5)) # empty just allocates the space and what ever garbage values are there, it shows the same. (no initialisation like np.ones)
end_e = time.perf_counter()
total_e = end_e - start_e
print(total_e)

0.00018590000036056153


# np.random.rand(shape)

Generates random floats in the range (0.0, 1.0)



In [123]:
npr = np.random.rand(5,5)
print(npr)

[[0.73620101 0.30802282 0.45831424 0.54432379 0.21562734]
 [0.874847   0.48981848 0.25864557 0.77081944 0.80524904]
 [0.67073597 0.8749174  0.80394983 0.31997878 0.34035723]
 [0.25837845 0.03257743 0.20278682 0.18086744 0.81127637]
 [0.77259465 0.86309035 0.80642464 0.01998636 0.58332864]]


In [124]:
a = np.random.rand(10)*10 # the values from 0 to 1 -> changes to 1 to 10
a = a.tolist() # convert a NumPy array into a native Python list
print(a)
a = [int(i) for i in a]
print(a)

[5.508245458213205, 6.840438312986549, 2.612451803263929, 1.1612453414070756, 3.1623832276661834, 4.298040334882696, 5.534183910587343, 8.382926989872198, 2.399567935708279, 4.54038032403011]
[5, 6, 2, 1, 3, 4, 5, 8, 2, 4]


# np.random.randint(start, end, number of values)
gives random integer values

end is excluisve

In [127]:
np.random.randint(1, 11, 10)

array([5, 1, 1, 2, 8, 5, 9, 6, 2, 4], dtype=int32)

# numpy attributes

In [160]:
arr = np.array([[1, 2, 3], [4, 5, 6]], dtype = np.int16) # we have changed the data type, by default it is int64
print(arr)
print("shape of the matrix / array : ",arr.shape)
print("dimension of the matrix / array : ",arr.ndim)
print("data type of elements enclosed here : ",arr.dtype)


[[1 2 3]
 [4 5 6]]
shape of the matrix / array :  (2, 3)
dimension of the matrix / array :  2
data type of elements enclosed here :  int16


# ndarray.itemsize

In [165]:
print(arr.itemsize) # Size in bytes of a single element
print(arr.nbytes) # Total memory used by the array

# dtype = np.int16, which is a 16-bit (2-byte) integer.
# Each element is 2 bytes → 6 × 2 = 12 bytes


2
12


# Transpose -> arr.T

In [39]:
print(arr)
print("Shape of array", arr.shape)

print("\n")

print(arr.T)
print("Shape of Transposed array", arr.T.shape)

[[1 2 3]
 [4 5 6]]
Shape of array (2, 3)


[[1 4]
 [2 5]
 [3 6]]
Shape of Transposed array (3, 2)


# indexing and slicing

In [167]:
arr = np.array([10,20,30,40,50,60])
print(arr[0])
print(arr[-1])
print(arr[1:4])
print(arr[:3])
print(arr[0:3])
print(arr[::2]) # (start with 0 and step size 2)
print(arr[::-1]) # reverse

10
60
[20 30 40]
[10 20 30]
[10 20 30]
[10 30 50]
[60 50 40 30 20 10]


In [168]:
mat = np.array([[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]])

print(mat[0,1])
print(mat[1])
print(mat[:,2]) # all rows but column index 2
print(mat[:,-1]) # all rows but last element of all rows

2
[4 5 6]
[3 6 9]
[3 6 9]


In [42]:
print(mat[0:2,1:])

[[2 3]
 [5 6]]


In [43]:
arr_temp = np.array([1,2,3,4,5,6,7,8,9])
arr_temp[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1])

In [169]:
print(mat[::-1, ::-1]) # reverse of a matrix

[[9 8 7]
 [6 5 4]
 [3 2 1]]


# Boolean indexing

In [170]:
arr = np.array([5, 10, 15, 20, 25])
print(arr>10)
print(arr[arr>10])  # Boolean indexing

# arr[arr > 10] = 99 # we can also modify the elements conditionally

indx = [0,3,4]
print(arr[indx])

arr[0] = 50
print(arr)

[False False  True  True  True]
[15 20 25]
[ 5 20 25]
[50 10 15 20 25]


# indexing with np.where()

In [171]:
arr = np.array([10, 15, 20, 25, 30])

idx = np.where(arr>20) # this will return the indexes based on condition
print(idx)
print(arr[idx])

(array([3, 4]),)
[25 30]


# array reshaping and resizing

### reshape -  will throw error if values are insufficient
### resize - will truncate or padd with zeros

In [173]:
arr = np.array([1, 2, 3, 4, 5, 6])

reshaped = arr.reshape(2,3)
print(reshaped)
print("\n")
arr = np.array([1, 2, 3, 4, 5, 6,7,8,9,10,11,12])
reshaped = arr.reshape(3,-1) # -1 to auto-calculate dimensions
print(reshaped)


[[1 2 3]
 [4 5 6]]


[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [57]:
a = np.array([[1, 2], [3, 4]])
b = a.flatten()
print(b)

[1 2 3 4]


In [80]:
a = np.array([1, 2, 3, 4])
a.resize((2,3))
print(a)
print("\n")
a.resize((2,1))
print(a)


[[1 2 3]
 [4 0 0]]


[[1]
 [2]]


In [60]:
# 12, (4,5)--> 20 --> reshape will throw error
# 12, (4,5) --> 20 --> resize, it will pad extra element with 0

In [None]:
a = np.array([[[1],[2],[3]], [[1],[2],[3]]])
print("shape : ", a.shape)
print(a)
print("\n")
# np.squeeze(), which is used to remove axes with length 1 from a NumPy array
# essentially simplifying the shape without losing data.
b = np.squeeze(a)
print("shape : ", b.shape)
print(b)

shape :  (2, 3, 1)
[[[1]
  [2]
  [3]]

 [[1]
  [2]
  [3]]]


shape :  (2, 3)
[[1 2 3]
 [1 2 3]]


In [None]:
a = np.array([[[1],[2],[3]]])
print(a.shape)
b = np.squeeze(a, axis=0) # removes axis 0, but only if its size is 1.
print(b.shape)
print(b)
print("=" * 10) # just a seprator
c = np.squeeze(a, axis=2) # removes axis 2, but only if its size is 1.
print(c.shape)
print(c)
print("=" * 10) # just a seprator
d = np.squeeze(a) # removes all the axis, where ever the size is 1.
print(d.shape)
print(d)
print("=" * 10) # just a seprator


(1, 3, 1)
(3, 1)
[[1]
 [2]
 [3]]
(1, 3)
[[1 2 3]]
(3,)
[1 2 3]


In [90]:
b = np.array([b])
print(b)

[[[1]
  [2]
  [3]]]


In [185]:
a = np.array([
                [1,2,3],
                [1,2,3],
                [1,2,3]
            ])

print(np.sum(a, axis=0)) # column-wise Sum
print(np.sum(a, axis=1)) # row-wise Sum

[3 6 9]
[6 6 6]


# numpy math operations

In [94]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

print(a + b)
print(a-b)
print(a*b)
print(a/b)

[5 7 9]
[-3 -3 -3]
[ 4 10 18]
[0.25 0.4  0.5 ]


In [95]:
# comparison

print(a > b)
print(a < b)
print(a == b)

[False False False]
[ True  True  True]
[False False False]


In [96]:
ans = a > b
print(type(ans))
print(type(ans[0]))

<class 'numpy.ndarray'>
<class 'numpy.bool'>


In [None]:
# logical operators -> and , or , not

x = np.array([True, False, True])
y = np.array([False, False, True])

print(np.logical_and(x, y))
print(np.logical_or(x, y))
print(np.logical_not(x))

[False False  True]
[ True False  True]
[False  True False]


In [98]:
# biwsise operators

a = np.array([1, 2, 3])
b = np.array([0, 1, 1])

print(np.bitwise_and(a,b))
print(np.bitwise_or(a,b))

[0 0 1]
[1 3 3]


In [99]:
# modulus and power

a = np.array([10, 20, 30])
b = np.array([3, 7, 4])

print(np.mod(a, b))
print(np.power(a, b))

[1 6 2]
[      1000 1280000000     810000]


# scalar opertions

In [101]:
celcius = np.array([0, 10, 20, 30])
# to_farenheit = np.array([33.8, 33.8, 33.8, 33.8]) # broadcasting - internally this is something that happens
# this "* 9 / 5 + 32" mathematical function is applied to each and every element of the array
print(celcius * 9 / 5 + 32)

[32. 50. 68. 86.]


# Common maths functions

In [102]:
a = np.array([1, 2, 3, 4])

print(np.sqrt(a))
print(np.exp(a)) # e^1, e^2, where e = 2.718 - > we will use this in ML (activation function: sigmoid - sigmoid = 1 / (1 + np.exp(-x)))
print(np.log(a))
print(np.sin(a))

[1.         1.41421356 1.73205081 2.        ]
[ 2.71828183  7.3890561  20.08553692 54.59815003]
[0.         0.69314718 1.09861229 1.38629436]
[ 0.84147098  0.90929743  0.14112001 -0.7568025 ]


# Aggregation functions

In [195]:
a = np.array([[1, 2, 3],
              [4, 5, 6]])

In [196]:
print(np.sum(a)) # sum of all the values present in the array
print(np.sum(a, axis=0)) # column wise sum

21
[5 7 9]


In [107]:
print(np.sum(a))
print(np.mean(a)) # average
print(np.std(a))
print(np.min(a))
print(np.max(a))

21
3.5
1.707825127659933
1
6



# element-wise multiplication

In [109]:
units = np.array([100, 200, 150])
profit = np.array([10, 20, 30])

units * profit

array([1000, 4000, 4500])

# matrix operation

# Dot product

In [197]:
# sqft, bedroom, location 1 G, 0 B
house = np.array([[1200, 3, 1],
                  [1200, 3, 0],
                  [1000, 3, 0],
                  [1000, 1, 1]])

w = np.array([200, 10000, 50000])

In [198]:
print(house.shape)
print(w.shape)

# w is your weight vector:
# Represents how important each feature is.

# Each sqft is worth ₹200
# Each bedroom adds ₹10,000
# Location G adds ₹50,000

(4, 3)
(3,)


In [199]:
print(np.dot(house, w))

[320000 270000 230000 260000]


In [116]:
print(np.sum(house * w, axis=1)) # multiplication + sum

[320000 270000 230000 260000]


In [200]:
A = np.array([[1, 2, 3, 4], [1, 2, 3, 4]])
B = np.array([[1, 2, 3, 4], [1, 2, 3, 4]])

print(A.shape)
print(B.shape)
print()

# print(np.dot(A, B)) # it will throw an error as for dot C1 should be = R2, which is not
print(np.dot(A, B.T))
# print(A @ B.T) # @ is also used for dot product

(2, 4)
(2, 4)

[[30 30]
 [30 30]]


# broadcasting

Two dimensions are compatible when:

They are equal, or

One of them is 1

In [122]:
a = np.array([1, 2, 3]) # (3,)
b = np.array([[10], [20]]) # (2,1)

print(a.shape)
print(b.shape)
print()

a + b

# a --> (2,3) [[1, 2, 3], [1, 2, 3]]
# b --> [[10, 10, 10], [20, 20, 20]]

(3,)
(2, 1)



array([[11, 12, 13],
       [21, 22, 23]])

# creating arrays from exisiting data

In [123]:
a = [1,2,3,4]
b = ([[1,2], [2,3]])
c = (10,20,30)

In [124]:
a_npy = np.array(a)
print(a_npy, a_npy.dtype, type(a))
b_npy = np.array(b)
print(b_npy, b_npy.dtype, type(b))
c_npy = np.array(c)
print(c_npy, c_npy.dtype, type(c))

[1 2 3 4] int64 <class 'list'>
[[1 2]
 [2 3]] int64 <class 'list'>
[10 20 30] int64 <class 'tuple'>


# vstack and hstack

In [125]:
# vstack
a = np.array([[1,2]])
b = np.array([[3,4]])
print(np.vstack((a,b)))
print()
print(np.hstack((a,b)))

[[1 2]
 [3 4]]

[[1 2 3 4]]


# np.char.<function_name>( )

In [201]:
# np.char.<function_name>()
# 1. np.char.add()

a = np.array(['hello', 'world'])
b = np.array(['_user', '_123'])

print(np.char.add(a, b))

['hello_user' 'world_123']


In [127]:
# np.char.multiply()
print(np.char.multiply(a, 2))

['hellohello' 'worldworld']


In [128]:
# np.char.upper() / lower() / capitalize()
a = np.array(['hello', 'WORLD'])

print(np.char.upper(a))
print(np.char.lower(a))
print(np.char.capitalize(a))

['HELLO' 'WORLD']
['hello' 'world']
['Hello' 'World']


In [129]:
# np.char.center()
print(np.char.center(a, 11, fillchar='*'))

['***hello***' '***WORLD***']


In [211]:
a = np.array(['data_science', 'machine_learning'])

# np.char.replace()
print(np.char.replace(a, '_', ' '))

# np.char.split()
print(np.char.split(a, sep='_'))

['data science' 'machine learning']
[list(['data', 'science']) list(['machine', 'learning'])]


In [212]:
# np.char.find() / np.char.count()

print(np.char.find(a, 'learn'))
print(np.char.count(a, 'a'))
print(np.char.count(a, 'data'))

[-1  8]
[2 2]
[1 0]


# statistical functions

In [132]:
data = np.array([1, 2, 3, 4, 5, 6])

print("Mean:", np.mean(data))
print("Median:", np.median(data))
print("Standard Deviation:", np.std(data))
print("Variance:", np.var(data))
print("Min:", np.min(data))
print("Max:", np.max(data))

Mean: 3.5
Median: 3.5
Standard Deviation: 1.707825127659933
Variance: 2.9166666666666665
Min: 1
Max: 6


In [136]:
print("Argmin:", np.argmin(data)) # index of min value
print("Argmax:", np.argmax(data))

Argmin: 0
Argmax: 4


In [134]:
matrix = np.array([[1, 2, 3],
                   [4, 5, 6]])

print(np.mean(matrix, axis=0))
print(np.mean(matrix, axis=1))

[2.5 3.5 4.5]
[2. 5.]


In [213]:
data = np.array([1, 3, 5, 7, 9])
print(np.percentile(data, 50)) # range 1 - 100
print(np.quantile(data, 1)) # range 0 - 1

5.0
9


# sort, search, counting functions

In [219]:
arr = np.array([3, 6, 1, 1, 1, 1, 8, 2, 5, 6])

print("Sorted:", np.sort(arr))
print("Sorted indices:", np.argsort(arr)) # tells us the index of sorted array
print("Insert 4 position:", np.searchsorted(np.sort(arr), 4)) # Finds the index where 4 should be inserted to keep the array sorted.
print("Unique elements:", np.unique(arr)) # Finds all unique values (removes duplicates) and returns sorted unique elements.
print("Count of each element:", np.bincount(arr)) # Counts the number of occurrences of each non-negative integer, where the index represents the value

# min - 1
# max - 8
# creates an array from 0 to max element (8)
# 0, 1, 2, 3, 4, 5, 6, 7, 8 --> len(9)

Sorted: [1 1 1 1 2 3 5 6 6 8]
Sorted indices: [3 2 5 4 7 0 8 1 9 6]
Insert 4 position: 6
Unique elements: [1 2 3 5 6 8]
Count of each element: [0 4 1 1 0 1 2 0 1]


In [138]:
arr = np.array([1, 2, 3, 4]) # simple 1D array ieration
for x in arr:
    print(x)

1
2
3
4


In [139]:
arr_2d = np.array([[1, 2], [3, 4]])
for x in arr_2d:
    print(x)

[1 2]
[3 4]


In [140]:
arr = np.arange(8).reshape(2,2,2) # using np.nditer() - iterate over every element in 2D array

for x in np.nditer(arr):
    print(x)

0
1
2
3
4
5
6
7


# copy

In [143]:
a = np.array([1, 2, 3])
b = a # it is just creating another refrence of a as b
print(id(a))
print(id(b))
b[0] = 100
print(a)

136856420250128
136856420250128
[100   2   3]


In [144]:
a = np.array([1, 2, 3])
b = a.copy()
print(id(a))
print(id(b))
b[0] = 100
print(a)

136857454811472
136856127415248
[1 2 3]


In [145]:
a = np.array([1, 2, 3])

b = a.view() # same memory refrence, no physical storage

c = a.copy()

In [146]:
b[0] = 10
print(a)

[10  2  3]


In [147]:
c[0] = 20
print(a)

[10  2  3]


In [148]:
a = np.arange(5)
b = a[1:4] # view
print(np.shares_memory(a, b))

True


In [149]:
a = np.arange(5)
b = a[1:4].copy()
print(np.shares_memory(a, b))

False


In [220]:
a = np.array([[1, 2], [3, 4]])
print(a.ravel()) # just shares the same memory, creates a View (if possible), Faster (no copy), Changes reflect in original
print(a.flatten()) # creates a copy, hence changes are not visible

[1 2 3 4]
[1 2 3 4]
