### 1. Working with `Basic Statistics`
------------

In [3]:
import numpy as np
np_list = np.array(range(2,21,2))

In [None]:
np_list

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

#### 5.1 mean
- https://www.mathsisfun.com/mean.html
- https://www.mathsisfun.com/numbers/geometric-mean.html
- https://www.mathsisfun.com/numbers/harmonic-mean.html

In [None]:
np.mean(np_list)

11.0

#### 5.2 median
- https://www.mathsisfun.com/median.html

- Step 1: oragnize data in asc
    - case 1: if no of items are odd --> then median is --> `(no of items  + 1)/2`
        - 1, 20,**30**,40,60

    - case 2: if no of items are even --> then medians are --> `no of items/2`,`(no of items/2)+ 1`
        - 1, 20,**30,40**,60,70

            - avg of above two median  is our required median (30+40)/2 = 35



In [None]:
np.median([1,20,30,40,60])

30.0

In [None]:
np.median([1,20,30,40,60,70])

35.0

In [None]:
np.median(np_list)

11.0

#### 5.3. Mode
- https://www.mathsisfun.com/mode.html

In [2]:
import numpy as np

In [12]:
a_np_array= np.array([6, 3, 9, 6, 6, 3,5, 9, 3,6])

In [13]:
from scipy import stats

In [14]:
stats.mode(a_np_array)

ModeResult(mode=array([6]), count=array([4]))

#### 5.3 Standard Deviation

- https://www.mathsisfun.com/data/standard-deviation.html

    $\sigma = \sqrt{ \frac{1}{N}\sum^N_{i=1}(x_{i}-\mu)^2} $

- It is useful to find out `outliers`
- Square root of `variance`$ (\sigma^2) $ is `standard deviation`$(\sigma)$

In [4]:
np_list

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [5]:
np.std(np_list)

5.744562646538029

In [6]:
std = np.std([600 ,470 ,170 ,430 ,300])

In [7]:
std

147.32277488562318

In [8]:
mean = np.mean([600 ,470 ,170 ,430 ,300])

In [9]:
mean

394.0

In [10]:
upper = mean + std

In [11]:
lower = mean -std

In [12]:
upper

541.3227748856232

In [13]:
lower

246.67722511437682

##### 5.3.1:outliers

In [14]:
np_1d_array = np.array([600 ,470 ,170 ,430 ,300])

In [15]:
np_1d_array[np.logical_or(upper<np_1d_array,lower>np_1d_array)]

array([600, 170])

`Home Work: `

Refer mean median mode calculations https://acadgild.com/blog/python-mean-median-mode

read it : https://www.statisticshowto.datasciencecentral.com/probability-distribution/

### 5.4.sort

In [None]:
import numpy as np

In [None]:
np.random.seed(1)
x = np.random.randn(10)

In [None]:
x

array([ 1.62434536, -0.61175641, -0.52817175, -1.07296862,  0.86540763,
       -2.3015387 ,  1.74481176, -0.7612069 ,  0.3190391 , -0.24937038])

In [None]:
x.sort()

In [None]:
x

array([-2.3015387 , -1.07296862, -0.7612069 , -0.61175641, -0.52817175,
       -0.24937038,  0.3190391 ,  0.86540763,  1.62434536,  1.74481176])

### 5.5. unique

In [2]:
import numpy as np
array = np.array([10,20,10,40,20,10,20,40,20])
print(np.unique(array,return_counts=True))

(array([10, 20, 40]), array([3, 4, 2], dtype=int64))


### 5.6 Set Operations

In [None]:
s1 = np.array(['desk','chair','bulb'])
s2 = np.array(['lamp','bulb','chair'])
print(s1, s2)

['desk' 'chair' 'bulb'] ['lamp' 'bulb' 'chair']


In [None]:
print( np.intersect1d(s1, s2) )

['bulb' 'chair']


In [None]:
print( np.union1d(s1, s2) )

['bulb' 'chair' 'desk' 'lamp']


In [None]:
print( np.setdiff1d(s1, s2) )# elements in s1 that are not in s2

['desk']


### 5.7 Broadcasting
- The term broadcasting describes how numpy treats arrays with different shapes during arithmetic operations
- For more details, please see
- [for more refer](https://docs.scipy.org/doc/numpy-1.10.1/user/basics.broadcasting.html)

In [1]:
import numpy as np
start = np.zeros((4,3))
print(start)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [2]:
start.shape

(4, 3)

In [3]:
# create a rank 1 ndarray with 3 values
add_rows = np.array([1, 0, 2])
print(add_rows)

[1 0 2]


In [4]:
add_rows.shape

(3,)

In [None]:
y = start + add_rows  # add to each row of 'start' using broadcasting
print(y)

[[1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]]


In [None]:
# create an ndarray which is 4 x 1 to broadcast across columns
add_cols = np.array([[0,1,2,3]])
add_cols = add_cols.T
print(add_cols)

[[0]
 [1]
 [2]
 [3]]


In [None]:
# add to each column of 'start' using broadcasting
y = start + add_cols 
print(y)

[[0. 0. 0.]
 [1. 1. 1.]
 [2. 2. 2.]
 [3. 3. 3.]]


In [None]:
# this will just broadcast in both dimensions
add_scalar = np.array([1])  
print(start+add_scalar)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


### 6.Working with random numbers with out seed (seed = same)

In [None]:
# generate 5 random integers between 10(Inclusive) and 50(Exclusive)
import numpy as np

In [None]:
print(np.random.randint(10,50,5))

[43 13 40 41 46]


In [None]:
print(np.random.randint(10,50,5))

[29 30 37 30 35]


### 7. Working with random numbers with seed
- seed is useful for reproducibility
- Seed must be between 0 and 2**32 - 1

In [None]:
np.random.randint?

In [None]:
np.random.seed(42)
print(np.random.randint(10,50,5))

[48 38 24 17 30]


In [None]:
np.random.seed(42)
print(np.random.randint(10,50,5))

[48 38 24 17 30]


In [None]:
np.random.seed(42)
print(np.random.randint(10,50,5))

[48 38 24 17 30]


In [None]:
np.random.seed(4)
print(np.random.randint(10,50,5))

[15 11 33 18 19]


In [None]:
np.random.seed(4)
print(np.random.randint(10,50,5))

[15 11 33 18 19]


In [None]:
np.random.random?

In [None]:
np.random.random(20)

> **For a given seed we will get same random numbers**

In [None]:
np.random.seed(8)
print(np.random.randint(10,50,5))

[13 30 15 36 18]


In [None]:
np.random.seed(8)
print(np.random.randint(10,50,5))

[13 30 15 36 18]


In [None]:
np.random.seed(8)
print(np.random.randint(10,50,5))

[13 30 15 36 18]


### 8.Working with linear algebra

In [3]:
# Create a matrix with 0(Inclusive) to 12(Exclusive) integers
a = np.arange(12).reshape(3,-1) # -1 means "whatever is needed"
print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [4]:
# Create a matrix with 12 random numbers
b = np.arange(2,26,2).reshape(3,-1) # -1 means "whatever is needed"
print(b)

[[ 2  4  6  8]
 [10 12 14 16]
 [18 20 22 24]]


In [5]:
# Add these two matrices
print(a+b)

[[ 2  5  8 11]
 [14 17 20 23]
 [26 29 32 35]]


In [6]:
# subtract these two matrices
print(b-a)

[[ 2  3  4  5]
 [ 6  7  8  9]
 [10 11 12 13]]


In [12]:
# multiply these two matrices
a = np.array([[1, 0], [0, 1]])
b = np.array([[4, 1], [2, 2]])
np.dot(a, b)

array([[4, 1],
       [2, 2]])

In [13]:
# divide these two matrices
b/a

  


array([[ 4., inf],
       [inf,  2.]])

### 9. Speedtest: ndarrays vs lists

In [None]:
from numpy import arange
from timeit import Timer
size    = 1000000
timeits = 1000

In [None]:
# create the ndarray with values 0,1,2...,size-1
nd_array = arange(size)
print( type(nd_array) )

<class 'numpy.ndarray'>


In [None]:
min(nd_array),max(nd_array),len(nd_array)

(0, 999999, 1000000)

In [None]:
# timer expects the operation as a parameter, 
# here we pass nd_array.sum()
timer_numpy = Timer("nd_array.sum()", "from __main__ import nd_array")

print("Time taken by numpy ndarray: %f seconds" % 
      (timer_numpy.timeit(timeits)/timeits))

Time taken by numpy ndarray: 0.000698 seconds


In [None]:
# create the list with values 0,1,2...,size-1
a_list = list(range(size))
print (type(a_list) )

<class 'list'>


In [None]:
min(a_list),max(a_list),len(a_list)

(0, 999999, 1000000)

In [None]:
# timer expects the operation as a parameter, here we pass sum(a_list)
timer_list = Timer("sum(a_list)", "from __main__ import a_list")

print("Time taken by list:  %f seconds" % 
      (timer_list.timeit(timeits)/timeits))

Time taken by list:  0.039344 seconds


> np array is faster then list

### 10.Read or Write to Disk

#### 10.1 Binary Format:

In [None]:
x = np.array([ 23.23, 24.24] )

In [None]:
x

In [None]:
import os
os.getcwd()

In [None]:
np.save('an_array', x) # observe file in current working directory

In [None]:
np.load('an_array.npy')

#### 10.2 Text Format:

In [None]:
np.savetxt('array.txt', X=x, delimiter=',') # Writeing on to disk

In [None]:
np.loadtxt('array.txt', delimiter=',') # Reading from disk

# home Work

https://docs.scipy.org/doc/numpy/user/quickstart.html