# NumPy (Numeric Package)

##### Lists in python are very slow so NumPy provides an **array** **object** that is up to 50x faster than lists. The array object in NumPy is called ndarray

### Why is NumPy Faster Than Lists?
##### NumPy arrays are stored at one continuous place in memory unlike lists, so processes can access and manipulate them very efficiently.

##### Open-source package
##### Multidimensional array object
##### All elements of same datatype
##### Zero-based indexing
##### Consists of contiguous 1-D segment of computer memory


In [None]:
import numpy as np
import pandas as pd

In [None]:
np.__version__

'1.19.5'

# type(), .ndim, .dtype

In [None]:
a = np.array([1,2,3,4])
print(a)
print(type(a))
print(a.dtype)
print(a.ndim)

# Above methods work with numpy array, pandas series, pandas dataframes

[1 2 3 4]
<class 'numpy.ndarray'>
int64
1


In [None]:
np.array([[1,2],[3,4],[5,6]]).ndim

2

In [None]:
b = np.array([(1,2,3,4), (5,6,7,8)])
b

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [None]:
# minimum dimensions 

a = np.array([1, 2, 3, 4, 5], ndmin = 2) 
a

array([[1, 2, 3, 4, 5]])

In [None]:
# dtype parameter 

import numpy as np 
a = np.array([1, 2, 3], dtype = np.complex_) 
a

array([1.+0.j, 2.+0.j, 3.+0.j])

##Datatype
##### NumPy consist of variety of datatypes. Eg-np.bool_, np.int_, np.float_, np.complex_

## Array Attributes

# ndarray.shape 

In [None]:
a2 = np.array([[1, 2, 3],[4, 5, 6]])
print(a2.shape)

(2, 3)


In [None]:
a3 = np.array([[1, 2, 3], [4, 5, 6]])
print(a3)
a3.shape = (3, 2) # inplace 
print(a3)


[[1 2 3]
 [4 5 6]]
[[1 2]
 [3 4]
 [5 6]]


# Copy vs View
##### Any changes made to the copy will not affect original array, and any changes made to the original array will not affect the copy.
##### Any changes made to the view will affect the original array, and any changes made to the original array will affect the view.

# ndarray.reshape(a, b) --> not inplace

In [None]:
a4 = np.array([[1, 2, 3], [4, 5, 6]])
print(a4)
a5 = a4.reshape(3, 2) # View
print(a5)

[[1 2 3]
 [4 5 6]]
[[1 2]
 [3 4]
 [5 6]]


# np.arange(n)
##### n.arange(start=0, stop, step=1, dtype='int')
##### Create an array in the range [start, stop)

In [None]:
a7 = np.arange(2, 20, 2, 'float')
print(a7)

[ 2.  4.  6.  8. 10. 12. 14. 16. 18.]


In [None]:
a7 = np.arange(24)
print(a7)
a8 = a7.reshape(2, 4, 3)
print(a8)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]
  [ 9 10 11]]

 [[12 13 14]
  [15 16 17]
  [18 19 20]
  [21 22 23]]]


# np.linspace(start=0, stop, nums=50, endpoint=True, retstep=False, dtype=None)
##### Returns num evenly spaced samples, calculated over the interval [start, stop].
###### if endpoint = True, stop value will be included else not included (default True)
##### retstep = True, return the step size b/w 2 numbers (default False)

In [None]:
x = np.linspace(10,20,5, False, True) 
print(x)
y = np.linspace(10,20,5) 
print(y)

(array([10., 12., 14., 16., 18.]), 2.0)
[10.  12.5 15.  17.5 20. ]


# ndarray.itemsize

In [None]:
# This array attribute returns the length of a single element of array in bytes. 

a9 = np.array([1,2,3,4,5], dtype = np.float32)
print(a9.itemsize)
a10 = np.array([1,2,3], dtype = np.int8) # 8 bits = 1 bytes
print(a10.itemsize)

4
1


# ndarray creation

# numpy.empty(shape, dtype=float)

In [None]:
# It creates an uninitialized array of specified shape and dtype. 
# show some random values as they are not initialized
# C (row major) or F (column major). C is default
# C order means that operating row-rise on the array will be slightly quicker
# F order means that column-wise operations will be faster.

a = np.empty((3,4), dtype = int, order = 'F')
print(a, '\n')
b = np.empty((3,4), order = 'C')
print(b)

[[       72918976        48477856 139963789727680               2]
 [              0 139963789727680 139963789727680     12884901888]
 [           1605        48477864               0              11]] 

[[3.60233460e-316 0.00000000e+000 7.92975362e-321 2.39512037e-316]
 [6.91513002e-310 2.39512077e-316 6.91513002e-310 6.91513002e-310]
 [0.00000000e+000 4.94065646e-324 3.04578529e-314 5.43472210e-323]]


# np.zeros(shape, dtype=float)

In [None]:
a = np.zeros((4,5), dtype = np.complex_)
print(a)

[[0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j]
 [0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j]
 [0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j]
 [0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j]]


# numpy.ones(shape, dtype=float)

In [None]:
# Returns a new array of specified size and type, filled with ones.
# default is float

a = np.ones((2,3), dtype = np.int8)
print (a)

[[1 1 1]
 [1 1 1]]


# np.full(shape, fill_value, dtype=None) 
##### create an array with fixed value
##### fill_value = None means np.array(fill_value).dtype

In [None]:
a = np.full((3,4), 8, dtype = np.complex_)
print(a)
a = np.full((3,4), 8)
print(a)

[[8.+0.j 8.+0.j 8.+0.j 8.+0.j]
 [8.+0.j 8.+0.j 8.+0.j 8.+0.j]
 [8.+0.j 8.+0.j 8.+0.j 8.+0.j]]
[[8 8 8 8]
 [8 8 8 8]
 [8 8 8 8]]


# np.eye(N, dtype=float)
##### makes identity matrix

In [None]:
a = np.eye(5) # makes square identity matrix
b = np.eye(5,3)
print(a, '\n\n', b)

[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]] 

 [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 0.]
 [0. 0. 0.]]


# numpy.random.rand(d0, d1, ..., dn)
##### Create an array of the given shape and populate it with random samples from a uniform distribution over **[0, 1)**.

In [None]:
print(np.random.rand(1), '\n')
print(np.random.rand(2,2,3))

[0.75016924] 

[[[0.90780425 0.38214128 0.31985898]
  [0.90828731 0.84509195 0.58102184]]

 [[0.6319711  0.29270652 0.61194028]
  [0.11069872 0.2716201  0.05437108]]]


# np.random.randint(low, high=None, size=None, dtype=int)
##### Return random integers from low (inclusive) to high (exclusive).
##### If high is None (the default), then results are from [0, low).

In [None]:
print(np.random.randint(4)) # random value from the range [0, 4)
print(np.random.randint(2, 5)) # random value from the range [2, 5)
print(np.random.randint(2, 5))
print(np.random.randint(2, 5, (2,3)))

3
3
3
[[2 2 4]
 [2 3 2]]


# np.random.random(size=None)
##### Return random floats in the interval [0.0, 1.0)

In [None]:
a = np.random.random((2,3)) # size in tuple
print(a)

[[0.01001884 0.23198277 0.57550696]
 [0.03270792 0.97254675 0.2473522 ]]


## create arrray from existing data

In [None]:
# np.asarray(a, dtype, order)
# a = Input data in any form such as list, list of tuples, tuples, tuple of tuples or tuple of lists

x = [1,2,4]
b = np.asarray(x, dtype = float) # list to array
print(b)
y = (1,2,3,4)
c = np.asarray(y) # tuple to array
print(c)

[1. 2. 4.]
[1 2 3 4]


In [None]:
x = [(1,2,3), (4,5,6)] # list of tuple to 2-D array
a = np.asarray(x)
print(a)

[[1 2 3]
 [4 5 6]]


**Later**

1.   np.frombuffer   
2.   np.fromiter





# Indexing

In [None]:
# 1D array
a = np.arange(10) 
b = a[5] 
print(b)
print(a[2:])
print(a[2:5])


5
[2 3 4 5 6 7 8 9]
[2 3 4]


In [None]:
# 2D array => a[row, col]

a = np.array([[1,2,3],[3,4,5],[4,5,6]]) 
# print 'Now we will slice the array from the index a[1:]' 
print(a[1:])
print('\n') 

print( 'The items in the second column are:')  
print( a[...,1] )
print ('\n')  

# Now we will slice all items from the second row 
print( 'The items in the second row are:') 
print (a[1,...] )
print('\n') 

# Now we will slice all items from column 1 onwards 
print ('The items column 1 onwards are:' )
print( a[...,1:])

[[3 4 5]
 [4 5 6]]


The items in the second column are:
[2 4 5]


The items in the second row are:
[3 4 5]


The items column 1 onwards are:
[[2 3]
 [4 5]
 [5 6]]


In [None]:
a = np.array([[1,2,3],[3,4,5],[4,5,6]]) 
print(a, '\n')
print(a[:, :], '\n') # all rows all columns
print(a[:, 0], '\n') # all rows 0th column
print(a[0, :], '\n') # 0th row all column

[[1 2 3]
 [3 4 5]
 [4 5 6]] 

[[1 2 3]
 [3 4 5]
 [4 5 6]] 

[1 3 4] 

[1 2 3] 



# Mathematics

In [None]:
a = np.array([1,2,3,4])
b = np.array((5,6,7,8))
print(a+b)
print(a-b)
print(a*b)
print(a/b)


[ 6  8 10 12]
[-4 -4 -4 -4]
[ 5 12 21 32]
[0.2        0.33333333 0.42857143 0.5       ]


# Linear Algebra

In [None]:
one = np.ones((3,3))
zero = np.zeros((3,3))
print(one@zero) # dot product / matrix multiplication
# one.dot(zero)
# np.matmul(one, zero)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


# Statistics

In [None]:
a = np.array(((1,2,3),(4,5,6),(7,8,9)))
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [None]:
print(np.max(a))
print(np.min(a))
print(np.sum(a))
print(np.mean(a))
print(np.sum(a, axis = 0)) # column-wise

9
1
45
5.0
[12 15 18]


# Transpose

In [None]:
a = np.array([[1,2,3,4],[5,6,7,8]])
a.T

array([[1, 5],
       [2, 6],
       [3, 7],
       [4, 8]])

# Miscellaneous

In [None]:
a = np.arange(1, 17).reshape(4, 4)
b = np.arange(17, 33).reshape(4,  4)
print(a)
print(b)
print(np.hstack((a, b))) # col-wise
print(np.vstack((a, b))) # row-wise


[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]]
[[17 18 19 20]
 [21 22 23 24]
 [25 26 27 28]
 [29 30 31 32]]
[[ 1  2  3  4 17 18 19 20]
 [ 5  6  7  8 21 22 23 24]
 [ 9 10 11 12 25 26 27 28]
 [13 14 15 16 29 30 31 32]]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]
 [17 18 19 20]
 [21 22 23 24]
 [25 26 27 28]
 [29 30 31 32]]


In [None]:
new_a = a[a > 10]
print(new_a)
print(a > 10) # iterate over array a

[11 12 13 14 15 16]
[[False False False False]
 [False False False False]
 [False False  True  True]
 [ True  True  True  True]]


# np.where()
#####  np.where(condition, if TRUE then this, if FALSE then this value)


In [None]:
a = np.arange(1, 10)
print(a)
b = np.where(a>5, a, 0)
print(b)

[1 2 3 4 5 6 7 8 9]
[0 0 0 0 0 6 7 8 9]


In [None]:
a = [[True, False],[True, True]]
b = np.where(a, [[1,2],[3,4]],[[5,6],[7,8]])
print(b)

[[1 6]
 [3 4]]


# np.argmax() / np.argmin()
##### returns **row** index of max and min element in array along a given axis
##### np.argmin(a, axis = None)

In [None]:
a = np.arange(6).reshape(2,3) + 10
print(a)

print(np.argmax(a))

print(np.argmax(a, axis=0)) # vertical
print(np.argmin(a, axis=0)) # vertical

print(np.argmax(a, axis=1)) # horizontal
print(np.argmin(a, axis=1)) # horizontal


[[10 11 12]
 [13 14 15]]
5
[1 1 1]
[0 0 0]
[2 2]
[0 0]


# dataframe.idxmin(axis=0)
##### Return index of first occurrence of minimum over requested axis.

In [None]:
import pandas as pd
df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48], 
                   'co2_emissions': [37.2, 19.66, 1712]},
                  index=['Pork', 'Wheat Products', 'Beef'])

print(df, '\n')

print(df.idxmin(), '\n')

print(df.idxmin(axis="columns"), '\n')

                consumption  co2_emissions
Pork                  10.51          37.20
Wheat Products       103.11          19.66
Beef                  55.48        1712.00 

consumption                Pork
co2_emissions    Wheat Products
dtype: object 

Pork                consumption
Wheat Products    co2_emissions
Beef                consumption
dtype: object 



# Difference b/w df.idxmin() and np.argmin()


In [None]:
a = np.array([[1,8,3],[6,3,2]])
print(a, '\n')
print(np.argmin(a, axis=0), '\n') # default None
print(np.argmin(a, axis=1), '\n') # default None

# ####
# In numpy we have np.argmin(a, axis=None) which works with numpy array or lists
# In pandas we have dataframe.idxmin(axis = 0) which works only with datafrmes and series
# ####

b = pd.DataFrame([[1,8,3],[6,3,2]], index=['a', 'b'], columns=['A', 'B', 'C'])
print(b, '\n')
print(b.idxmin(), '\n') # default 0 
# out of (a:1, b:6) a is min, out of(a:8, b:3) b is min, out of (a:3, b:2) b is min ---> therefore, ans = [a, b, b]
print(b.idxmin(axis=1))
# out of (A:1, B:8, C:3) A is min, out of(A:6, B:3, C:2) C is min ---> therefore, ans = [A, C]


[[1 8 3]
 [6 3 2]] 

[0 1 1] 

[0 2] 

   A  B  C
a  1  8  3
b  6  3  2 

A    a
B    b
C    b
dtype: object 

a    A
b    C
dtype: object


# np.sort()
##### Return a sorted **copy** of an array.
##### numpy.sort(a, axis=-1, kind=None, order=None)
##### default axis -1 means sort according to last axis i.e. axis=1 in 2D array
##### axis = None means flatten and sort the array

In [None]:
arr = [[4,1],[2,6]]
a=np.array(arr)

default_axis = np.sort(a) # default axis = -1

axis0 = np.sort(a, axis=0)

none_axis = np.sort(a, axis = None)

print(default_axis)
print(axis0)
print(none_axis)

[[1 4]
 [2 6]]
[[2 1]
 [4 6]]
[1 2 4 6]


# np.argsort()
##### (a, axis=-1, kind=None, order=None)
##### Returns the indices that would sort an array.

In [None]:
x = np.array([3, 1, 2])
np.argsort(x)

array([1, 2, 0])

In [None]:
x = np.array([[0, 3], [2, 2]])
print(x)
ind = np.argsort(x, axis=0)  # sorts along first axis (down)
print(ind)
ind2 = np.argsort(x, axis=1)  # sorts along first axis (down)
print(ind2)

[[0 3]
 [2 2]]
[[0 1]
 [1 0]]
[[0 1]
 [0 1]]


# np.sum()
##### np.sum(a, axis=None, dtype=None)
##### dtype: The type of the returned array and of the accumulator in which the elements are summed

In [None]:
print(np.sum([0.5, 1.5])) # axis = None i.e. flatten and sum

print(np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32)) # convert all elements to int and then add -> [0,0,0,1] = 1

print(np.sum([[0, 1], [0, 5]])) # axis = None i.e. flatten and sum

print(np.sum([[0, 1], [0, 5]], axis=0)) # vertical

print(np.sum([[0, 1], [0, 5]], axis=1)) # horizontal

print(np.sum([[0, 4, 2], [3, 6, 8]], where=[False, True, True], axis=1)) # (4+2) (6+8)

print(np.sum([[0, 4], [3, 6]], where=[True, False], axis=1))

print(np.sum([[0, 4, 8], [3, 6, 1], [2, 5, 7]], where=[True, False, False], axis=1)) # axis=1 means from every row take sum of only those values which are at True position 

print(np.sum([[0, 4], [3, 6]], where=[True, False], axis=0)) # for axis=0,  False means that ans at this position should be zero

print(np.sum([[0, 4, 2], [3, 6, 8]], where=[False, True, True], axis=0)) 

print(np.sum([[0, 4, 2], [3, 6, 8]], where=[True, False, False], axis=0)) 


2.0
1
6
[0 6]
[1 5]
[ 6 14]
[0 3]
[0 3 2]
[3 0]
[ 0 10 10]
[3 0 0]


# DataFrame.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
##### Return DataFrame with duplicate rows removed.

##### subset: Only consider certain columns for identifying duplicates, by default use all of the columns.

##### keep{‘first’, ‘last’, False}: Determines which duplicates (if any) to keep. - first : Drop duplicates except for the first occurrence. - last : Drop duplicates except for the last occurrence. - False : Drop all duplicates.

##### inplace: Whether to drop duplicates in place or to return a copy.

##### ignore_index: If True, the resulting axis will be labeled 0,1,…,n-1.

In [None]:
df = pd.DataFrame({
    'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'],
    'style': ['cup', 'cup', 'cup', 'pack', 'pack'],
    'rating': [4, 4, 3.5, 15, 5]
})
df

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
1,Yum Yum,cup,4.0
2,Indomie,cup,3.5
3,Indomie,pack,15.0
4,Indomie,pack,5.0


In [None]:
t = df.drop_duplicates() # not inplace
t

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
2,Indomie,cup,3.5
3,Indomie,pack,15.0
4,Indomie,pack,5.0


In [None]:
t = df.drop_duplicates(subset=['brand']) # remove duplicates according to brand
t

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
2,Indomie,cup,3.5


In [None]:
print(df, '\n')
t = df.drop_duplicates(keep=False) # drop all
print(t, '\n')
t2 = df.drop_duplicates(keep='last', ignore_index=True) # 0,1,2,3....
print(t2, '\n')

     brand style  rating
0  Yum Yum   cup     4.0
1  Yum Yum   cup     4.0
2  Indomie   cup     3.5
3  Indomie  pack    15.0
4  Indomie  pack     5.0 

     brand style  rating
2  Indomie   cup     3.5
3  Indomie  pack    15.0
4  Indomie  pack     5.0 

     brand style  rating
0  Yum Yum   cup     4.0
1  Indomie   cup     3.5
2  Indomie  pack    15.0
3  Indomie  pack     5.0 



In [None]:
# df.drop_duplicates(inplace=True) # inplace
# df

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
2,Indomie,cup,3.5
3,Indomie,pack,15.0
4,Indomie,pack,5.0


# dataframe.sub(other, axis='columns', level=None, fill_value=None)
##### other: scalar, sequence, Series, or DataFrame

In [None]:
df = pd.DataFrame([[1,2],[3,4]], index=['a','b'], columns=['A','B'])
df

Unnamed: 0,A,B
a,1,2
b,3,4


In [None]:
df.sub(4)

Unnamed: 0,A,B
a,-3,-2
b,-1,0


In [None]:
df2 = pd.DataFrame([[1,1],[2,2]], index=['a','b'], columns=['A','B'])
t = df.sub(df2)
t

Unnamed: 0,A,B
a,0,1
b,1,2


In [None]:
dff = pd.Series([3,4])
s = pd.Series([1,1])
t = dff.sub(s)
t

0    2
1    3
dtype: int64

# .tolist()
##### Converts df, series, numpy array to list

In [None]:
# convert pandas series to list
lis = [10,20,30,40]
ser = pd.Series(lis)
ser.tolist()

[10, 20, 30, 40]

In [None]:
# convert pandas dataframe to list
# cannot convert df to list directly so first convert it to numpy ndarray using .index and then to list
dict = [[10,20],[30,40],[50,60]]
df = pd.DataFrame(dict)
df.to_numpy().tolist()

[[10, 20], [30, 40], [50, 60]]

In [None]:
# convert numpy array to list
arr = [10,20,30]
narr = np.array(arr)
narr.tolist()

[10, 20, 30]

# dataframe.values   **!! Use dataframe.to_numpy() instead**
##### converts dataframe to numpy array
##### Only the values in the DataFrame will be returned, the axes labels will be removed.

In [None]:
df = pd.DataFrame({'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
        'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3]})
df.values

array([['cat', 2.5],
       ['cat', 3.0],
       ['snake', 0.5],
       ['dog', nan],
       ['dog', 5.0],
       ['cat', 2.0],
       ['snake', 4.5],
       ['cat', nan],
       ['dog', 7.0],
       ['dog', 3.0]], dtype=object)

# dataframe.to_numpy()
##### converts pandas series or df to numpy array

In [None]:
pd.DataFrame({"A": [1, 2], "B": [3, 4]}).to_numpy()

array([[1, 3],
       [2, 4]])

In [None]:
pd.Series([10,20,30]).to_numpy()

array([10, 20, 30])

# .index
##### returns index of a Series or dataframe

In [None]:
# index of series
s=pd.Series([10,20,30], index = ['a','b','c'])

print(s.index)
print(s.index.to_numpy()) # convert to numpy 
print(s.index.to_numpy().tolist()) # convert to list

Index(['a', 'b', 'c'], dtype='object')
['a' 'b' 'c']
['a', 'b', 'c']


In [None]:
# index of df

data = {'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
        'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
        'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}

labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
def generate():
    
    df = pd.DataFrame(data, index=labels)
    
    print(df.index)
    print(df.index.to_numpy())
    print(df.index.tolist())


    return 
generate()

Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'], dtype='object')
['a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j']
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']


# pd.Index(1D data)
##### **Immutable** sequence used for indexing and alignment.
##### data array-like (1-dimensional)

In [None]:
lis = ['a','b','c','d'] 
labels = pd.Index(lis) # use this as index or labels as they are immutable
s = pd.Series([1,2,3,4], index=labels)
s

a    1
b    2
c    3
d    4
dtype: int64

### data.Age.isna() return true false
### data['Age'].fillna(data['Age'].median(), inplace=True) means replace NaN values
### data.drop(['a', 'b']) delete column(specify axis)
### data.apply(fn) appy this fn along the given axis
### data.Age.map({'a':'b'}) substitute values ith another values
### pd.cut(array, bins) -> age to age ranges
### data.Age.value_counts() returns the count of unique values