## Import NumPy
Once NumPy is installed, import it in your applications by adding the import keyword: import numpy

In [1]:
import numpy as np

## NumPy Creating Arrays

In [5]:
arr=np.array([1,2,3,4,5])
print(arr)
print(type(arr))
print(arr.dtype)

[1 2 3 4 5]
<class 'numpy.ndarray'>
int32


## Checking NumPy Version


In [3]:
print(np.__version__)

1.24.3


## Dimensions in Arrays

In [7]:
# 0-D Array
arr=np.array(34)
print(arr)
print(arr.ndim)

34
0


In [14]:
#1-D array
arr1=np.array([1,2,3,4])
print(arr1.ndim)

1


In [11]:
#2-D array
arr2=np.array([[1,2,3],[4,5,6]])
print(arr2)
print(arr2.ndim)

[[1 2 3]
 [4 5 6]]
2


In [12]:
#3-D array
arr3=np.array([[[1,2,3],[4,5,6]],[[7,8,9],[11,12,13]]])
print(arr3)
print(arr3.ndim)

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [11 12 13]]]
3


## NumPy Array Shape

In [15]:
print(arr.shape)
print(arr1.shape)
print(arr2.shape)
print(arr3.shape)


(4,)
(4,)
(2, 3)
(2, 2, 3)


## Numpy Reshape

In [24]:
import numpy as np
arr=np.array([1,2,3,4,5,6])
print(arr)
reshaped_Arr=arr.reshape(2,3)
print(reshaped_Arr)

[1 2 3 4 5 6]
[[1 2 3]
 [4 5 6]]


## NumPy Array Indexing

In [25]:
arr=np.array([1,2,3,4,5])
print(arr[0])
print(arr[0]+arr[1])

1
3


In [26]:
# Access 2D arrays
arr2 = np.array([
    [1,2,3],
    [4,5,6]
])
print(arr2[0,1])

2


In [None]:
# 3-D Arrays
arr3 = np.array([
    [
        [1,2,3],
        [4,5,6]
    ],
    [
        [1,2,3],
        [4,5,6]
    ]
])
print(arr3[0,1,2])
print(arr3[1,0,1])

In [36]:
#Negative Indexing
arr=np.array([[1,2,3,4,5],[6,7,8,9,10]])

print(arr[0,-2])
print(arr[0:2,0:2])
print(arr[0,-4:])

4
[[1 2]
 [6 7]]
[2 3 4 5]


## NumPy Array Iterating

In [37]:
import numpy as np

In [40]:
arr = np.array([
    [1,2,3,4,5],
    [6,7,8,9,10]
])
for row in arr:
    for col in row:
        print(col,'',end='')
    print()

1 2 3 4 5 
6 7 8 9 10 


In [41]:
arr3 = np.array([
    [
        [1,2,3],
        [4,5,6]
    ],
    [
        [1,2,3],
        [4,5,6]
    ]
])

for x in arr3:
    for y in x:
        for z in y:
            print(z,'',end='')
        print()
    print()

1 2 3 
4 5 6 

1 2 3 
4 5 6 



# NumPy Array Slicing
[:end]: 0 to end

[start:end]: start to end

[start:]: start to last index

[::step]: start to end increment of step

[start:end:step]: start to end follows given steps

In [45]:
#creating a NumPy array
arr=np.array([1,2,3,4,5,6,7,8,9])

s1=arr[:6]
print(s1)
#Slicing from index 2 to index 5
s2=arr[2:5]
print(s2)
#Slicing 3 to end
s3=arr[3:]
print(s3)

s4=arr[::2]
print(s4)

[1 2 3 4 5 6]
[3 4 5]
[4 5 6 7 8 9]
[1 3 5 7 9]


In [50]:
# Creating a 2D Numpy Array and Apply Slicing
arr_2d = np.array([
    [1,2,3],
    [4,5,6],
    [7,8,9]
])

#Slicing rows
row=arr_2d[1]
print(row)
#Slicing Col
col=arr_2d[:,0]
print(col)

mat=arr_2d[0:2,1:]
print(mat)


[4 5 6]
[1 4 7]
[[2 3]
 [5 6]]


## Numpy DataTypes
### Numeric Data Types:

* int8, int16, int32, int64: Signed integers of 8, 16, 32, or 64 bits respectively.
* uint8, uint16, uint32, uint64: Unsigned integers of 8, 16, 32, or 64 bits respectively.
* float16, float32, float64: Floating point numbers of 16, 32, or 64 bits respectively.
* complex64, complex128: Complex numbers using 32 or 64 bits for each part.

In [51]:
a=np.array([1,2,3])
print(a.dtype)
b=np.array([1.2,4.5,2.2])
print(b.dtype)

int32
float64


In [52]:
#Specifing data type in numpy arrays
arr=np.array([1,2,4],dtype=np.int32)
arrf=np.array([2.3,4.2,2.2],dtype=np.float32)
print(arr,arr.dtype)
print(arrf,arrf.dtype)

[1 2 4] int32
[2.3 4.2 2.2] float32


In [56]:
#Converting between data types
arr=np.array([1,2,3])
#convert to float
arrf=arr.astype(np.float32)
print(arrf)

arr2=np.array([2.2,3.99,32.3323])
arr2=arr2.astype(np.int32)
print(arr2)

[1. 2. 3.]
[ 2  3 32]


## Joining NumPy Arrays

In [57]:
#Concatenation array using np.concatenate
arr1=np.array([1,2,3])
arr2=np.array([4,5])
result=np.concatenate((arr1,arr2))
print(result)

[1 2 3 4 5]


## Splitting NumPy Arrays

In [63]:
#create dummy data
arr=np.arange(1,10,2)
print(arr)

[1 3 5 7 9]


In [74]:
#Spliting the array into 3 equal parts
arr=np.arange(1,11)
arr=np.split(arr,2)
print(arr)
for i in arr:
    for x in i:
        print(x)

[array([1, 2, 3, 4, 5]), array([ 6,  7,  8,  9, 10])]
1
2
3
4
5
6
7
8
9
10


In [79]:
#unequal Splitting with np.array_split
arr=np.arange(1,11)
sp2=np.array_split(arr,6)
print(sp2)

[array([1, 2]), array([3, 4]), array([5, 6]), array([7, 8]), array([9]), array([10])]


## NumPy Searching Arrays

In [81]:
#np.where
arr=np.arange(1,11)
#finding indices where the value is greate than 3
index=np.where(arr>3)
print(index)

(array([3, 4, 5, 6, 7, 8, 9], dtype=int64),)


In [86]:
arr=np.array([5,3,2,55,3,23,2])
index =np.where(arr>=8)
print(index)

(array([3, 5], dtype=int64),)


## Array Masking

In [87]:
#Create a boolena mask
msk=arr>12
filtered=arr[msk]
print(filtered)

[55 23]


## NumPy Sorting Arrays

In [89]:
arr=np.array([233,3,12,34,33,])
sorted_arr=np.sort(arr)
print(sorted_arr)

[  3  12  33  34 233]


In [91]:
#reverse sort
rev_sort=np.sort(arr)[::-1]
print(rev_sort)

[233  34  33  12   3]


In [97]:
#Sort 2d array
arr2d=np.array([[3,12,4],[5,4,2]])
#Sort along rows (axis=1)
sortedRows=np.sort(arr2d,axis=1)
print(sortedRows)

[[ 3  4 12]
 [ 2  4  5]]


In [98]:
#Sort Along With Column
arr2d=np.array([[3,12,4],[5,4,2]])
sorted_Col=np.sort(arr2d,axis=0)
print(sorted_Col)


[[ 3  4  2]
 [ 5 12  4]]


## Array Operations

In [99]:
arr1=np.array([1,2,3])
arr2=np.array([1,2,3])
add=arr1+arr2
print(add)

[2 4 6]


In [101]:
mul=arr1*arr2
print(mul)

[1 4 9]


In [105]:
sqrt=np.sqrt(arr1)
print(sqrt)

[1.         1.41421356 1.73205081]


In [109]:
arr=np.array([[1,2,3],[4,5,6]])
scalar=10
result=arr+10
print(result)

[[11 12 13]
 [14 15 16]]


In [110]:
row_to_add=np.array([10,20,30])
res=arr+row_to_add
print(res)

[[11 22 33]
 [14 25 36]]


# Descriptive Statistics:
* Mean: Mean is also known as average of all the numbers in the data set which is calculated by below equation.
* Median: Median is mid value in this ordered data set.
* Mode: Mode is the number which occur most often in the data set.Here 150 is occurring twice so this is our mode.


* Variance:Variance is the numerical values that describe the variability of the observations from its arithmetic mean and denoted by sigma-squared(σ2 )Variance measure how far individuals in the group are spread out, in the set of data from the mean.
* Standard Deviation: it is a measure of dispersion of observation within dataset relative to their mean.It is square root of the variance and denoted by Sigma (σ) .Standard deviation is expressed in the same unit as the values in the dataset so it measure how much observations of the data set differs from its mean.

* Link: https://medium.com/analytics-vidhya/statistics-mean-median-mode-variance-standard-deviation-47fab926465a

In [112]:
data=np.array([10,20,30,40,50,60])
#mean
mean=np.mean(data)
median=np.median(data)
variance=np.var(data)
std_dev=np.std(data)
print('Mean:',mean)
print('Median:',median)
print('Variance:',variance)
print('std_dev',std_dev)

Mean: 35.0
Median: 35.0
Variance: 291.6666666666667
std_dev 17.07825127659933


## Handling Missing Values

In [116]:
data=np.array([1,2,np.nan,4,5,np.nan,7])
#Checking for nan values
nan_count=np.sum(np.isnan(data))
print(nan_count)

2


In [119]:
#Romoving nan values
data_cleaned=data[~np.isnan(data)]
print(data_cleaned)


[1. 2. 4. 5. 7.]
1.0
2.0
4.0
5.0
7.0


In [118]:
#Calculate mean excluding all Nan
mean=np.nanmean(data)
#replacing nan values with mean value
data_filled=np.where(np.isnan(data),mean,data)
print(data_filled)

[1.  2.  3.8 4.  5.  3.8 7. ]
