# Why Numpy ?


- Moreover, Numpy is storing the data efficiently rather than identifying the array with variables
- That means, storing data cost will decrease !!!

In [4]:
# Loading the libraries
import numpy as np
import pandas as pd

a = [1,2,3,4]
b = [2,3,4,5]

# Multipying matrix without numpy
ab = []                        

for i in range(0, len(a)):
  ab.append(a[i]*b[i])

ab


[2, 6, 12, 20]

In [5]:
# Multiplying matrix with numpy

a = np.array([1,2,3,4])
b = np.array([2,3,4,5])
a * b

array([ 2,  6, 12, 20])

## Creating Numpy Array

In [10]:
a = np.array([4,5,6,7], dtype = int)
b = np.array([8.74,9.76,10.44,11.2], dtype =float)
# dtype will assign the data types of the array

- We can define an array like above. Also, Numpy provides methods to create random arrays or vectors.
- So, Let's look at the examples !!!

In [8]:
# Creating vector (1x10) with 0 values
np.zeros(10, dtype = int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [11]:
# Creating vector (1x10) with 1 values 
np.ones(10, dtype = int)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [12]:
# Creating 3x5 array with 3 values
np.full((3,5), 3)

array([[3, 3, 3, 3, 3],
       [3, 3, 3, 3, 3],
       [3, 3, 3, 3, 3]])

In [13]:
# Creating a vector between 0 and 31 with incrementing the base value with 3
np.arange(0,31, 3)

array([ 0,  3,  6,  9, 12, 15, 18, 21, 24, 27, 30])

In [14]:
# Creating a vector between 0 and 1 with equal division 
np.linspace(0,1,10)

array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
       0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ])

In [15]:
# Creating 3x4 normal distributed array (mean:10, std:4)
np.random.normal(10, 4, (3,4))

array([[ 6.7538659 , 11.01970858, 10.18426324, 12.65400349],
       [14.31876189,  8.66278753,  8.12742044, 14.51921333],
       [ 3.74827149,  9.4587089 ,  2.45229757,  3.61091842]])

In [17]:
# Creating 3x3 random array (the values are picking randomly betwwen 0 and 10)
np.random.randint(0,10, (3,3))

array([[1, 3, 1],
       [0, 0, 5],
       [2, 6, 2]])

In [18]:
# Additionally, we can see the data type with type method
type(a)

numpy.ndarray

# Numpy Array Property

* ndim: _dimension of the array_
* shape: _shape of the array_
* size: _size of the array_
* dtype: _data type of the array_

In [19]:
a = np.random.randint(10, size = 10)

In [20]:
a.ndim

1

In [21]:
a.shape

(10,)

In [22]:
a.size

10

In [24]:
a.dtype

dtype('int64')

In [25]:
b = np.random.randint(0,10,(3,5))
b

array([[4, 7, 3, 3, 8],
       [6, 8, 8, 7, 4],
       [2, 3, 8, 1, 6]])

In [26]:
b.ndim

2

In [27]:
b.shape

(3, 5)

In [28]:
b.dtype

dtype('int64')

In [30]:
b.size

15

## Reshaping Numpy Array

- Sometimes, we need to reshape data from multi-dimension to one dimension or vice versa !!!
- So, we are using reshape method for the np array

In [34]:
a = np.arange(1,10)
a

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [35]:
np.arange(1,10).reshape((3,3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [36]:
b = a.reshape((1,9))
b

array([[1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [37]:
b.ndim

2

## Concatenation of the Numpy Array

In [38]:
x = np.array([1,2,3])
y = np.array([4,5,6])

In [39]:
np.concatenate([x,y])

array([1, 2, 3, 4, 5, 6])

In [40]:
z = np.array([7,8,9])
np.concatenate([x,y,z])

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [43]:
# 2D numpy array
ab = np.array([[1,2,3],
              [4,5,6]])

In [46]:
np.concatenate([ab,ab])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [47]:
# axis = 0 states row
# axis = 1 states column
np.concatenate([ab,ab], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [48]:
np.concatenate([ab,ab], axis=0)

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

## Splitting the Numpy Array

In [63]:
x = np.array([14,51,54,62,632,623,653])
np.split(x, (3,5))

[array([14, 51, 54]), array([ 62, 632]), array([623, 653])]

In [64]:
# Definig th splitted array to variable
a, b, c = np.split(x, (3,5))

In [65]:
a

array([14, 51, 54])

In [66]:
# 2D np array splitting
m =np.arange(16).reshape(4,4)
m

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [67]:
# vsplit will split the numpy array vertically depending on the givin index number
above, below = np.vsplit(m, [2])

In [68]:
above

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [69]:
below

array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [74]:
# hsplit will split the numpy array horizontally depending on the givin index number
left, right = np.hsplit(m, [2])

In [75]:
left

array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])

In [76]:
right

array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])

## Sorting the Numpy Array

In [77]:
s = np.array([42,532,13,15,123,531])

In [78]:
# sorting the numpy array values 
np.sort(s)

array([ 13,  15,  42, 123, 531, 532])

In [85]:
# When we use the sort method like below, it will change the array
# So we can define another varible and we can state the sorted array to anotheer variable
s.sort()

In [86]:
s

array([ 13,  15,  42, 123, 531, 532])

In [87]:
sorted_arr = np.sort(s)
sorted_arr

array([ 13,  15,  42, 123, 531, 532])

In [91]:
# Sorting 2D numpy array
arr_2d = np.random.normal(10,5, (3,5))
arr_2d

array([[ 7.66599364, 11.36292196, 13.70467812, -0.37272475,  4.68577867],
       [11.93302921,  8.89443618, 11.00199739,  9.26904519,  9.0933035 ],
       [18.08587054,  3.55046335,  2.33933257, 13.33925773,  3.53028503]])

In [92]:
np.sort(arr_2d, axis=1)

array([[-0.37272475,  4.68577867,  7.66599364, 11.36292196, 13.70467812],
       [ 8.89443618,  9.0933035 ,  9.26904519, 11.00199739, 11.93302921],
       [ 2.33933257,  3.53028503,  3.55046335, 13.33925773, 18.08587054]])

In [93]:
np.sort(arr_2d, axis =0)

array([[ 7.66599364,  3.55046335,  2.33933257, -0.37272475,  3.53028503],
       [11.93302921,  8.89443618, 11.00199739,  9.26904519,  4.68577867],
       [18.08587054, 11.36292196, 13.70467812, 13.33925773,  9.0933035 ]])

## Find Array Value by Index

In [95]:
f_index = np.random.randint(10, size =10)
f_index

array([5, 9, 0, 1, 4, 8, 8, 9, 4, 9])

In [96]:
f_index[0]

5

In [97]:
f_index[-1]

9

In [98]:
f_index[-2]

4

In [99]:
f_index[-2] = 66

In [100]:
f_index = np.random.randint(10, size =(3,5))
f_index

array([[9, 9, 8, 1, 4],
       [9, 5, 7, 1, 9],
       [9, 0, 9, 5, 7]])

In [101]:
f_index[0,-1]

4

In [105]:
f_index[2,4]

7

## Find Array Value by Slicing

In [107]:
a = np.arange(20,50,3)
a

array([20, 23, 26, 29, 32, 35, 38, 41, 44, 47])

In [108]:
a[0:3]

array([20, 23, 26])

In [109]:
a[3:]

array([29, 32, 35, 38, 41, 44, 47])

In [110]:
a[:3]

array([20, 23, 26])

In [111]:
a[1::2]

array([23, 29, 35, 41, 47])

In [112]:
# 2D Slice
b = np.random.randint(10, size= (5,5))
b

array([[9, 4, 5, 9, 1],
       [2, 2, 1, 7, 2],
       [7, 4, 9, 6, 2],
       [4, 9, 5, 8, 8],
       [7, 5, 9, 3, 4]])

In [114]:
b[:,0]

array([9, 2, 7, 4, 7])

In [115]:
b[:,-1]

array([1, 2, 2, 8, 4])

In [116]:
b[0,:]

array([9, 4, 5, 9, 1])

In [119]:
b[1,:]

array([2, 2, 1, 7, 2])

In [120]:
b[0:2, 0:3]

array([[9, 4, 5],
       [2, 2, 1]])

In [121]:
b[::, :2]

array([[9, 4],
       [2, 2],
       [7, 4],
       [4, 9],
       [7, 5]])

In [122]:
b[2]

array([7, 4, 9, 6, 2])

## Data Manipulation for Subset of the Array

In [123]:
a =np.random.randint(10, size= (5,5))
a

array([[8, 2, 6, 6, 0],
       [9, 5, 4, 4, 8],
       [2, 1, 7, 3, 7],
       [0, 3, 9, 5, 7],
       [2, 8, 6, 5, 3]])

In [124]:
subset_a = a[0:3,0:2]
subset_a

array([[8, 2],
       [9, 5],
       [2, 1]])

In [125]:
subset_a[1,1] = 74482
subset_a

array([[    8,     2],
       [    9, 74482],
       [    2,     1]])

In [127]:
# The raw data also changed
a

array([[    8,     2,     6,     6,     0],
       [    9, 74482,     4,     4,     8],
       [    2,     1,     7,     3,     7],
       [    0,     3,     9,     5,     7],
       [    2,     8,     6,     5,     3]])

In [128]:
# So, if we don't want to change the raw data
# and process some manipulation for the subset of the data
# We can use copy method
m =np.random.randint(10, size= (5,5))
m

array([[4, 9, 7, 3, 8],
       [3, 4, 0, 0, 9],
       [6, 0, 0, 6, 3],
       [8, 9, 7, 7, 8],
       [3, 4, 2, 0, 7]])

In [129]:
subset_m = m[0:3,0:2].copy()
subset_m

array([[4, 9],
       [3, 4],
       [6, 0]])

In [131]:
subset_m[1:2] = 5793
subset_m

array([[   4,    9],
       [5793, 5793],
       [   6,    0]])

In [133]:
# The raw dataset did not change !!!
m

array([[4, 9, 7, 3, 8],
       [3, 4, 0, 0, 9],
       [6, 0, 0, 6, 3],
       [8, 9, 7, 7, 8],
       [3, 4, 2, 0, 7]])

## Get the Element of the Array with Fancy Index

In [135]:
v = np.arange(0,30,3)
v

array([ 0,  3,  6,  9, 12, 15, 18, 21, 24, 27])

In [136]:
[v[1], v[3], v[6]]

[3, 9, 18]

In [138]:
# Fancy index
get_data =[1,3,6] # determining the index that we need
v[get_data] # using the fancy index
v

array([ 0,  3,  6,  9, 12, 15, 18, 21, 24, 27])

In [157]:
# 2D array
m=np.arange(25).reshape((5,5))
m

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [155]:
row = np.array([1,1])
column = np.array([1,2])

In [161]:
m[row, column]
m

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [162]:
# Basic index usage and fancy index usage together
m[0,[1,2]]

array([1, 2])

In [163]:
# Slice and fancy index together
m[0:, [1,2]]

array([[ 1,  2],
       [ 6,  7],
       [11, 12],
       [16, 17],
       [21, 22]])

## Conditinal Element Manipulation

In [164]:
v = np.array([1,2,3,4,5])
v > 3

array([False, False, False,  True,  True])

In [165]:
v < 3

array([ True,  True, False, False, False])

In [166]:
v[v<3] # fancy index

array([1, 2])

In [167]:
v[v==3]

array([3])

In [168]:
v*2

array([ 2,  4,  6,  8, 10])

In [169]:
v/5

array([0.2, 0.4, 0.6, 0.8, 1. ])

In [170]:
v*5/10+2

array([2.5, 3. , 3.5, 4. , 4.5])

In [171]:
v**2

array([ 1,  4,  9, 16, 25])

## Mathematical Processes

In [173]:
v-1

array([0, 1, 2, 3, 4])

In [174]:
v*10

array([10, 20, 30, 40, 50])

In [175]:
# numpy is using ufunc for the mathematical processes
np.subtract(v,1)

array([0, 1, 2, 3, 4])

In [176]:
np.power(v,3)

array([  1,   8,  27,  64, 125])

In [177]:
v%2

array([1, 0, 1, 0, 1])

In [178]:
np.mod(v,3)

array([1, 2, 0, 1, 2])

In [179]:
np.absolute(np.array(([-3])))

array([3])

In [180]:
np.sin(360)

0.9589157234143065

In [181]:
np.cos(45)

0.5253219888177297

In [182]:
np.tan(45)

1.6197751905438615

In [183]:
np.log(v)

array([0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791])

In [185]:
# numpy documentation
?np

## Statistical Calculations

* np.mean(arr,axis=0) | Returns mean along specific axis

* arr.sum() | Returns sum of arr

* arr.min() | Returns minimum value of arr

* arr.max(axis=0) | Returns maximum value of specific axis

* np.var(arr) | Returns the variance of array

* np.std(arr,axis=1) | Returns the standard deviation of specific axis

* arr.corrcoef() | Returns correlation coefficient of array

In [187]:
v =np.arange(30)
v

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [189]:
np.mean(v, axis=0)

14.5

In [190]:
v.sum()

435

In [191]:
v.min()

0

In [192]:
v.max()

29

In [193]:
np.var(v)

74.91666666666667

In [194]:
np.std(v)

8.65544144839919

## Equation with 2 unknowns

**5 * x0 + x1 = 12  
x0 + 3 * x1 = 10**

In [198]:
# We need to establish the coefficient of the equations
a = np.array([[5,1],[1,3]])
b = np.array([12,10])


In [199]:
x = np.linalg.solve(a,b)

In [200]:
x

array([1.85714286, 2.71428571])