This notebook is inspired from:
[Jake VanderPlas - Python Data Science Handbook](https://jakevdp.github.io/PythonDataScienceHandbook/index.html)

# Numpy

In [2]:
## importing numpy
import numpy as np

np.__version__

'1.18.1'

## Creating Numpy Arrays

__From lists__

In [16]:
my_array1 = np.array([[[4, 1, 7, 13, 2.76]], [[4, 1, 7, 13, 2.76]]])

In [23]:
my_1d_array = np.array([13, 27, 33, 1e56], dtype = 'int32')

OverflowError: Python int too large to convert to C long

In [35]:
array1.shape

(4,)

In [30]:
array1

array([1, 2, 4, 4], dtype=int32)

In [17]:
type(my_array1)

numpy.ndarray

In [18]:
my_array1

array([[[ 4.  ,  1.  ,  7.  , 13.  ,  2.76]],

       [[ 4.  ,  1.  ,  7.  , 13.  ,  2.76]]])

In [19]:
my_array1.shape

(2, 1, 5)

In [36]:
# if we want we can also specify the data types

array1 = np.array([1,2,3,4], dtype = 'int32')

In [39]:
# but aware that they have certain range and limitations.
array1[2]  = 4.5

array1

array([1, 2, 4, 4], dtype=int32)

Unlike lists, arrays can be multidimensional

In [40]:
multidim = np.array([[1,2,3,12],
          [4,5,6,11], 
          [7,8,9,10]])

In [42]:
multidim

array([[ 1,  2,  3, 12],
       [ 4,  5,  6, 11],
       [ 7,  8,  9, 10]])

In [41]:
multidim.shape

(3, 4)

__From Scratch__

In [48]:
# we can create a numpy array with zeros of any shape
zeros = np.zeros(7)

In [49]:
zeros

array([0., 0., 0., 0., 0., 0., 0.])

In [53]:
zeros.shape

(7,)

In [66]:
## Again we can pass the dtype

a = np.zeros((2,6), dtype = 'int')

In [60]:
a[1,2] = 1.2

In [65]:
b = a.T
b

array([[0, 0],
       [0, 0],
       [0, 1],
       [0, 0],
       [0, 0],
       [0, 0]])

In [67]:
a

array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

In [69]:
a == b

  """Entry point for launching an IPython kernel.


False

In [70]:
np.ones((3, 8))

array([[1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1.]])

In [71]:
# we can create an array of any shape filled with any number:

np.full((2, 7), .23)

array([[0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23],
       [0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23]])

Other useful methods for creating arrays:

- `np.arrange`

- `np.linspace`

- `np.random.random`

In [74]:
np.arange(5, 25,3)

array([ 5,  8, 11, 14, 17, 20, 23])

In [79]:
np.linspace(0, 100, 10)

array([  0.        ,  11.11111111,  22.22222222,  33.33333333,
        44.44444444,  55.55555556,  66.66666667,  77.77777778,
        88.88888889, 100.        ])

In [80]:
np.random.random(size = 10)

array([0.74333947, 0.5257552 , 0.74663129, 0.96926715, 0.96534167,
       0.06455327, 0.74866246, 0.8224098 , 0.66322446, 0.12185576])

In [82]:
np.random.randn(10)

array([ 1.73414068,  0.80887545,  1.6287405 ,  2.55331856,  1.1940059 ,
       -0.17661199, -0.08778159, -0.24642623, -0.96192302, -0.64606859])

In [84]:
np.random.normal(loc =69, scale = 2, size =100)

array([65.73029806, 66.85713518, 66.84852065, 66.58655707, 72.111109  ,
       69.71209533, 69.8933566 , 70.07088356, 71.75247485, 68.66924195,
       68.59109707, 71.61570135, 67.0389092 , 69.15200398, 71.60597859,
       68.3532322 , 68.16826517, 64.2474354 , 71.11007425, 65.64938499,
       67.69561284, 69.07077153, 69.73444476, 67.80313938, 71.88369896,
       71.60747585, 72.18864571, 67.74645886, 67.89714072, 70.74189459,
       68.17255667, 74.06123303, 71.2662148 , 68.53998429, 68.62924512,
       68.27415981, 69.9490893 , 67.51929139, 70.72901342, 69.60849947,
       68.31321816, 71.30583641, 69.75584535, 71.47262916, 67.70438224,
       69.92275664, 66.00283778, 68.83374754, 68.38685795, 68.63482474,
       66.99339899, 67.54128956, 70.65086218, 72.2704038 , 69.09899911,
       66.36279985, 66.22243005, 68.01959766, 63.68824348, 70.85493297,
       68.75042331, 65.63802162, 70.85059548, 71.52839382, 69.18017316,
       71.60853255, 69.96283866, 69.80302705, 68.6661572 , 69.26

## Descriptive Statistics with Numpy

In [105]:
## let's create a sample from normally distributed population of size = 10

In [130]:
np.random.seed(60120)
sample1 = np.random.normal(loc = 10, scale =1, size = 10)

In [131]:
sample1

array([11.09688495,  9.82876882, 10.89658301,  7.64402997,  9.05907328,
       10.00666684,  9.72806703,  9.95520623, 12.27507952, 10.28825701])

In [112]:
np.random.seed(60121)

sample2 = np.random.normal(loc = 10, scale =1, size = 15)

In [113]:
sample1

array([10.95413593, 11.7386018 , 10.09515643,  8.52498662,  9.23815279,
        9.97679125,  8.31590909, 10.73590512, 10.42514155,  9.30034622,
       10.32771434,  8.79743381, 10.52932989,  9.79309832, 10.1672429 ])

In [114]:
sample1.mean()

9.927996405047585

In [None]:
## what is the mean of sample1?

10.095156429177244

In [120]:
np.mean(sample1)

9.927996405047585

In [None]:
## what is the median of sample1?

In [None]:
np.median(sample1)

In [126]:
## sorting sample1
sample1.sort()

In [124]:
np.sort(sample1)

array([ 7.64402997,  9.05907328,  9.72806703,  9.82876882,  9.95520623,
       10.00666684, 10.0846814 , 10.28825701, 10.32313706, 10.51879844,
       10.81935414, 10.89495876, 10.89658301, 11.09688495, 12.27507952])

In [129]:
sample1

array([11.09688495,  9.82876882, 10.89658301,  7.64402997,  9.05907328,
       10.00666684,  9.72806703,  9.95520623, 12.27507952, 10.28825701,
       10.0846814 , 10.81935414, 10.32313706, 10.51879844, 10.89495876])

In [None]:
## what is the 0.1 percentile of sample1?

In [133]:
sample1

array([11.09688495,  9.82876882, 10.89658301,  7.64402997,  9.05907328,
       10.00666684,  9.72806703,  9.95520623, 12.27507952, 10.28825701])

In [136]:
np.percentile(sample1, q = 0.1, interpolation= 'higher')

9.059073278428512

In [None]:
## Where is the max/min in sample1

In [137]:
np.max(sample1)

12.275079515331011

In [138]:
sample1.max()

12.275079515331011

In [141]:
sample1.argmax()

8

In [142]:
sample1.argmin()

3

In [144]:
sample1[sample1.argmax()]

12.275079515331011

In [146]:
multidim

array([[ 1,  2,  3, 12],
       [ 4,  5,  6, 11],
       [ 7,  8,  9, 10]])

In [149]:
multidim.max(axis =0)

array([ 7,  8,  9, 12])

In [148]:
multidim.max(axis =0)

array([ 7,  8,  9, 12])

In [151]:
multidim.argmax(axis =0)

array([2, 2, 2, 0])

In [143]:
## We can use different formattings as we print values
print('Maximum of sample1 is %.2f'%sample1.max())
print('The index of the max in sample1 is {}'.format(sample1.argmax()))

Maximum of sample1 is 12.28
The index of the max in sample1 is 8


## Extra Reading

[Comparison between % and format](https://stackoverflow.com/questions/5082452/string-formatting-vs-format)

[Descriptive Statistics](https://www.hackerearth.com/blog/developers/descriptive-statistics-python-numpy/)