# AGENDA
* Numpy Revision 
* Descriptive Statistics With Numpy 

## Importing Numpy 

In [1]:
import numpy as np 

In [2]:
from numpy import *

### Why Numpy
So far the `numpy.ndarray` looks awefully much like a Python list (or nested list). Why not simply use Python lists for computations instead of creating a new array type? 

There are several reasons:

* Python lists are very general. They can contain any kind of object. They are **dynamically typed**. They do not support mathematical functions such as matrix and dot multiplications, etc. Implementing such functions for Python lists would not be very efficient because of the dynamic typing.
* Numpy arrays are **statically typed**. The type of the elements is determined when the array is created.
* Numpy arrays are memory efficient.
* Because of the static typing, fast implementation of mathematical functions such as multiplication and addition of `numpy` arrays can be implemented in a compiled language (C is used).

### Creating numpy array
* using list or tuple (manuplate with datatype and dim)
* using numpy functions like arange - random - linspace - diag - ones - zeros

In [3]:
lst = [1,2,3]

In [4]:
lst

[1, 2, 3]

In [5]:
type(lst)

list

In [6]:
arr = np.array(lst)

In [7]:
arr

array([1, 2, 3])

In [8]:
type(arr)

numpy.ndarray

In [9]:
tpl = (4,5,6)

In [10]:
type(tpl)

tuple

In [11]:
tpl_arr = np.array(tpl)

In [12]:
tpl_arr

array([4, 5, 6])

In [13]:
(tpl_arr[0])

4

In [14]:
type(tpl_arr[0])

numpy.int32

In [15]:
np.array([1,2,3] , dtype = float) + 0.4

array([1.4, 2.4, 3.4])

In [16]:
np.array([2,3,4] , dtype = str)

array(['2', '3', '4'], dtype='<U1')

In [17]:
lst = [1,2,3 , 'a']

In [18]:
lst

[1, 2, 3, 'a']

In [19]:
np.array(lst)

array(['1', '2', '3', 'a'], dtype='<U11')

In [20]:
lst = [1,2,3,4.4]

In [21]:
np.array(lst)

array([1. , 2. , 3. , 4.4])

In [22]:
lst = [1.2 , 4.3 , 'a']

In [23]:
np.array(lst)

array(['1.2', '4.3', 'a'], dtype='<U32')

In [24]:
matt = [
    [1,3,23],
    [3,4,5],
    [5,6,7]
]

In [25]:
matt

[[1, 3, 23], [3, 4, 5], [5, 6, 7]]

In [26]:
mat_arr = np.array(matt)

In [27]:
mat_arr

array([[ 1,  3, 23],
       [ 3,  4,  5],
       [ 5,  6,  7]])

In [28]:
len(matt)

3

In [29]:
mat_arr.ndim

2

In [30]:
arr.ndim

1

#### Check type , dtype , shape , size

In [31]:
mat_arr.shape

(3, 3)

In [32]:
mat_arr.size

9

In [33]:
len(mat_arr)

3

In [34]:
type(mat_arr)

numpy.ndarray

In [35]:
mat_arr.dtype

dtype('int32')

###### arange

In [36]:
list(range(0 , 10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [37]:
np.arange(0 , 10 , 1 , dtype = int)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [38]:
np.arange(0 , 10 , 1 , dtype = int).shape

(10,)

In [39]:
np.arange(0 , 10 , 1 , dtype = int).reshape(2 , 5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [40]:
np.arange(0 , 10 , 1 , dtype = int).reshape(10 , 1 )

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [41]:
np.arange(0 , 10 , 1 , dtype = int).reshape(10 , 1 ).ndim

2

In [42]:
np.arange(0 , 10 , 1 , dtype = int).reshape(1 , 10 )

array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [43]:
np.arange(0 , 10 , 1 , dtype = int).reshape(1 , 10 ).ndim

2

###### linspace 

In [44]:
np.linspace(1 , 10)

array([ 1.        ,  1.18367347,  1.36734694,  1.55102041,  1.73469388,
        1.91836735,  2.10204082,  2.28571429,  2.46938776,  2.65306122,
        2.83673469,  3.02040816,  3.20408163,  3.3877551 ,  3.57142857,
        3.75510204,  3.93877551,  4.12244898,  4.30612245,  4.48979592,
        4.67346939,  4.85714286,  5.04081633,  5.2244898 ,  5.40816327,
        5.59183673,  5.7755102 ,  5.95918367,  6.14285714,  6.32653061,
        6.51020408,  6.69387755,  6.87755102,  7.06122449,  7.24489796,
        7.42857143,  7.6122449 ,  7.79591837,  7.97959184,  8.16326531,
        8.34693878,  8.53061224,  8.71428571,  8.89795918,  9.08163265,
        9.26530612,  9.44897959,  9.63265306,  9.81632653, 10.        ])

In [45]:
np.linspace(1 , 10 , 4)   # vector 

array([ 1.,  4.,  7., 10.])

In [46]:
np.linspace(1 , 10 , 4).shape

(4,)

In [47]:
np.linspace(1 , 10 , 4).reshape(1 , 4)  # matrix

array([[ 1.,  4.,  7., 10.]])

In [48]:
np.linspace(1 , 10 , 4).reshape(1 , 4).shape

(1, 4)

In [49]:
np.linspace(1 , 10 , 4).reshape(2 ,2 )

array([[ 1.,  4.],
       [ 7., 10.]])

###### random

In [50]:
np.random.random(10)

array([0.13313111, 0.94701889, 0.27609171, 0.51412441, 0.23055398,
       0.31730988, 0.35010922, 0.41858876, 0.52043219, 0.57498392])

In [51]:
np.random.random(size = (10 , 10))

array([[0.78660355, 0.35332477, 0.21591756, 0.39935684, 0.80638563,
        0.08623735, 0.32221199, 0.22738716, 0.9478296 , 0.46422578],
       [0.25610739, 0.76538287, 0.4936387 , 0.22100876, 0.56825412,
        0.93945255, 0.56491244, 0.93742939, 0.16755428, 0.14819622],
       [0.83256108, 0.40097287, 0.91249709, 0.02244738, 0.01565141,
        0.24340115, 0.99132763, 0.91958883, 0.49116152, 0.67480683],
       [0.33958864, 0.19024077, 0.3086357 , 0.00105595, 0.66829087,
        0.99870364, 0.23930893, 0.51176555, 0.29244173, 0.39542181],
       [0.23940573, 0.13738084, 0.61059963, 0.16598463, 0.81579785,
        0.6420588 , 0.47179504, 0.42624053, 0.44527956, 0.45861912],
       [0.54401103, 0.79096922, 0.10432784, 0.66321295, 0.79465999,
        0.36006877, 0.26240539, 0.1852011 , 0.86862822, 0.75000418],
       [0.3449614 , 0.22835055, 0.00220223, 0.01771625, 0.77296327,
        0.99319057, 0.39127559, 0.73916748, 0.61500842, 0.8021483 ],
       [0.32432797, 0.01305439, 0.2073651

In [52]:
np.random.rand(10)

array([0.84197223, 0.501961  , 0.70302516, 0.82600905, 0.44022921,
       0.1383266 , 0.75184334, 0.35970228, 0.47358636, 0.82490849])

In [53]:
np.random.randn(10) # normal dis

array([ 0.78094532,  0.63601947, -1.25819534, -0.67671076,  1.98301861,
        2.19601776,  0.46688691, -0.44360761, -0.69781529, -0.93512131])

In [54]:
np.random.randint(10 , 20)

18

In [55]:
np.random.randint(10 , 20 , size = (2,5))

array([[17, 10, 10, 10, 10],
       [18, 19, 16, 18, 18]])

###### diagonal

In [56]:
np.diag([1,2,3])

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

In [57]:
mat_arr

array([[ 1,  3, 23],
       [ 3,  4,  5],
       [ 5,  6,  7]])

In [58]:
np.diag(mat_arr)

array([1, 4, 7])

In [59]:
lst = [1,2,3]
lst

[1, 2, 3]

In [60]:
lst + 3 

TypeError: can only concatenate list (not "int") to list

###### zeros & ones 

In [61]:
np.zeros(shape = (2,2))

array([[0., 0.],
       [0., 0.]])

In [62]:
np.zeros(shape = (2,2)) + 3 

array([[3., 3.],
       [3., 3.]])

In [63]:
np.ones(shape = (3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [64]:
np.ones(shape = (3,3)) * 3 

array([[3., 3., 3.],
       [3., 3., 3.],
       [3., 3., 3.]])

### indexing & slicing 

In [65]:
mtrx = np.arange(1 , 101 ).reshape(10 , 10)
mtrx

array([[  1,   2,   3,   4,   5,   6,   7,   8,   9,  10],
       [ 11,  12,  13,  14,  15,  16,  17,  18,  19,  20],
       [ 21,  22,  23,  24,  25,  26,  27,  28,  29,  30],
       [ 31,  32,  33,  34,  35,  36,  37,  38,  39,  40],
       [ 41,  42,  43,  44,  45,  46,  47,  48,  49,  50],
       [ 51,  52,  53,  54,  55,  56,  57,  58,  59,  60],
       [ 61,  62,  63,  64,  65,  66,  67,  68,  69,  70],
       [ 71,  72,  73,  74,  75,  76,  77,  78,  79,  80],
       [ 81,  82,  83,  84,  85,  86,  87,  88,  89,  90],
       [ 91,  92,  93,  94,  95,  96,  97,  98,  99, 100]])

In [66]:
mtrx[ : , -2:]

array([[  9,  10],
       [ 19,  20],
       [ 29,  30],
       [ 39,  40],
       [ 49,  50],
       [ 59,  60],
       [ 69,  70],
       [ 79,  80],
       [ 89,  90],
       [ 99, 100]])

In [67]:
mtrx[ -2 :  ,  -2: ]

array([[ 89,  90],
       [ 99, 100]])

In [68]:
mtrx[  0 : 2  , 0 : 2   ]

array([[ 1,  2],
       [11, 12]])

In [69]:
mtrx[0][0]

1

In [70]:
mtrx[0 , 0]

1

In [71]:
mtrx[ 0 , 3 ]

4

In [72]:
mtrx[ 0 , : ]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [73]:
mtrx[ 0 : 2 , : ]

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]])

In [74]:
mtrx[0 , -1]

10

In [75]:
mtrx[0]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [76]:
mtrx[-1]

array([ 91,  92,  93,  94,  95,  96,  97,  98,  99, 100])

### fancy indexing 

In [77]:
mtrx

array([[  1,   2,   3,   4,   5,   6,   7,   8,   9,  10],
       [ 11,  12,  13,  14,  15,  16,  17,  18,  19,  20],
       [ 21,  22,  23,  24,  25,  26,  27,  28,  29,  30],
       [ 31,  32,  33,  34,  35,  36,  37,  38,  39,  40],
       [ 41,  42,  43,  44,  45,  46,  47,  48,  49,  50],
       [ 51,  52,  53,  54,  55,  56,  57,  58,  59,  60],
       [ 61,  62,  63,  64,  65,  66,  67,  68,  69,  70],
       [ 71,  72,  73,  74,  75,  76,  77,  78,  79,  80],
       [ 81,  82,  83,  84,  85,  86,  87,  88,  89,  90],
       [ 91,  92,  93,  94,  95,  96,  97,  98,  99, 100]])

In [78]:
mtrx.shape[0]

10

In [79]:
mtrx[[1,2] , ]

array([[11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]])

In [80]:
mtrx[[-1 , 0] , ]

array([[ 91,  92,  93,  94,  95,  96,  97,  98,  99, 100],
       [  1,   2,   3,   4,   5,   6,   7,   8,   9,  10]])

In [81]:
nrow = list(range(0 , mtrx.shape[0] , 2) )
nrow

[0, 2, 4, 6, 8]

In [82]:
mtrx[nrow , ]

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
       [21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
       [41, 42, 43, 44, 45, 46, 47, 48, 49, 50],
       [61, 62, 63, 64, 65, 66, 67, 68, 69, 70],
       [81, 82, 83, 84, 85, 86, 87, 88, 89, 90]])

In [83]:
mtrx[[i for i in range(0 , mtrx.shape[0]) if i % 2 == 0] , : ]

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
       [21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
       [41, 42, 43, 44, 45, 46, 47, 48, 49, 50],
       [61, 62, 63, 64, 65, 66, 67, 68, 69, 70],
       [81, 82, 83, 84, 85, 86, 87, 88, 89, 90]])

In [84]:
mtrx[ :: 2 ]

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
       [21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
       [41, 42, 43, 44, 45, 46, 47, 48, 49, 50],
       [61, 62, 63, 64, 65, 66, 67, 68, 69, 70],
       [81, 82, 83, 84, 85, 86, 87, 88, 89, 90]])

In [85]:
mtrx[ 1:: 2 ]

array([[ 11,  12,  13,  14,  15,  16,  17,  18,  19,  20],
       [ 31,  32,  33,  34,  35,  36,  37,  38,  39,  40],
       [ 51,  52,  53,  54,  55,  56,  57,  58,  59,  60],
       [ 71,  72,  73,  74,  75,  76,  77,  78,  79,  80],
       [ 91,  92,  93,  94,  95,  96,  97,  98,  99, 100]])

## Linear Algebra
* We can use the usual arithmetic operators to multiply, add, subtract, and divide arrays with scalar numbers.

In [86]:
v = np.array([1,2,3])

In [87]:
v * 3 

array([3, 6, 9])

In [88]:
v

array([1, 2, 3])

In [89]:
lst = [1,2,3]

In [90]:
lst * 3 

[1, 2, 3, 1, 2, 3, 1, 2, 3]

In [91]:
lst

[1, 2, 3]

In [93]:
lst * lst 

TypeError: can't multiply sequence by non-int of type 'list'

In [None]:
v * v

In [None]:
v + v

In [None]:
v - v

In [None]:
mat = np.array([
    [1,2,3],
    [2,4,4],
    [1,2,3]
])

In [None]:
mat * 3

In [None]:
mat + 3 

In [None]:
mat * mat

In [None]:
np.dot(mat , mat)

### reshape , casting (astype) , flatten

In [None]:
mtrx

In [None]:
mtrx.flatten()

In [None]:
mtrx.astype(float)

In [None]:
mtrx = mtrx.astype(str)

In [None]:
mtrx

In [None]:
mtrx.reshape(1 , 100 )

### max , min , sum , cumsum , cumprod , prod , mean , median , std , var 

In [None]:
mtrx = mtrx.astype(int)

In [94]:
mtrx.max()

100

In [95]:
mtrx

array([[  1,   2,   3,   4,   5,   6,   7,   8,   9,  10],
       [ 11,  12,  13,  14,  15,  16,  17,  18,  19,  20],
       [ 21,  22,  23,  24,  25,  26,  27,  28,  29,  30],
       [ 31,  32,  33,  34,  35,  36,  37,  38,  39,  40],
       [ 41,  42,  43,  44,  45,  46,  47,  48,  49,  50],
       [ 51,  52,  53,  54,  55,  56,  57,  58,  59,  60],
       [ 61,  62,  63,  64,  65,  66,  67,  68,  69,  70],
       [ 71,  72,  73,  74,  75,  76,  77,  78,  79,  80],
       [ 81,  82,  83,  84,  85,  86,  87,  88,  89,  90],
       [ 91,  92,  93,  94,  95,  96,  97,  98,  99, 100]])

In [96]:
mtrx.max(axis = 1 )

array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100])

In [97]:
mtrx.max(axis = 0)

array([ 91,  92,  93,  94,  95,  96,  97,  98,  99, 100])

In [98]:
mtrx.sum()

5050

In [99]:
mtrx.sum(axis = 1 )

array([ 55, 155, 255, 355, 455, 555, 655, 755, 855, 955])

In [100]:
mtrx.sum(axis = 0)

array([460, 470, 480, 490, 500, 510, 520, 530, 540, 550])

In [101]:
mtrx

array([[  1,   2,   3,   4,   5,   6,   7,   8,   9,  10],
       [ 11,  12,  13,  14,  15,  16,  17,  18,  19,  20],
       [ 21,  22,  23,  24,  25,  26,  27,  28,  29,  30],
       [ 31,  32,  33,  34,  35,  36,  37,  38,  39,  40],
       [ 41,  42,  43,  44,  45,  46,  47,  48,  49,  50],
       [ 51,  52,  53,  54,  55,  56,  57,  58,  59,  60],
       [ 61,  62,  63,  64,  65,  66,  67,  68,  69,  70],
       [ 71,  72,  73,  74,  75,  76,  77,  78,  79,  80],
       [ 81,  82,  83,  84,  85,  86,  87,  88,  89,  90],
       [ 91,  92,  93,  94,  95,  96,  97,  98,  99, 100]])

In [102]:
mtrx.cumsum()

array([   1,    3,    6,   10,   15,   21,   28,   36,   45,   55,   66,
         78,   91,  105,  120,  136,  153,  171,  190,  210,  231,  253,
        276,  300,  325,  351,  378,  406,  435,  465,  496,  528,  561,
        595,  630,  666,  703,  741,  780,  820,  861,  903,  946,  990,
       1035, 1081, 1128, 1176, 1225, 1275, 1326, 1378, 1431, 1485, 1540,
       1596, 1653, 1711, 1770, 1830, 1891, 1953, 2016, 2080, 2145, 2211,
       2278, 2346, 2415, 2485, 2556, 2628, 2701, 2775, 2850, 2926, 3003,
       3081, 3160, 3240, 3321, 3403, 3486, 3570, 3655, 3741, 3828, 3916,
       4005, 4095, 4186, 4278, 4371, 4465, 4560, 4656, 4753, 4851, 4950,
       5050], dtype=int32)

In [103]:
mtrx.cumsum(axis = 1 )

array([[  1,   3,   6,  10,  15,  21,  28,  36,  45,  55],
       [ 11,  23,  36,  50,  65,  81,  98, 116, 135, 155],
       [ 21,  43,  66,  90, 115, 141, 168, 196, 225, 255],
       [ 31,  63,  96, 130, 165, 201, 238, 276, 315, 355],
       [ 41,  83, 126, 170, 215, 261, 308, 356, 405, 455],
       [ 51, 103, 156, 210, 265, 321, 378, 436, 495, 555],
       [ 61, 123, 186, 250, 315, 381, 448, 516, 585, 655],
       [ 71, 143, 216, 290, 365, 441, 518, 596, 675, 755],
       [ 81, 163, 246, 330, 415, 501, 588, 676, 765, 855],
       [ 91, 183, 276, 370, 465, 561, 658, 756, 855, 955]], dtype=int32)

In [104]:
mtrx.mean()

50.5

In [105]:
mtrx.mean(axis = 1 )

array([ 5.5, 15.5, 25.5, 35.5, 45.5, 55.5, 65.5, 75.5, 85.5, 95.5])

## Descriptive Statistics Assignment 
* Measures of central tendency: mean, median, and mode
* Measures of Dispersion or Variation : range, Interquartile range,variance , Quartile and standard deviation , correlation

#### Read Data using numpy 
* load - save data .npy

In [2]:
matrix = np.random.randint(10 , 100 , (10,10))

In [5]:
matrix

array([[28, 30, 79, 90, 90, 71, 68, 72, 79, 91],
       [33, 15, 46, 87, 66, 16, 86, 63, 56, 40],
       [45, 58, 90, 10, 36, 38, 88, 89, 96, 83],
       [71, 19, 92, 35, 21, 84, 27, 32, 71, 51],
       [23, 58, 58, 83, 82, 93, 45, 64, 79, 77],
       [81, 60, 13, 77, 57, 11, 50, 70, 11, 82],
       [85, 89, 62, 29, 19, 81, 10, 14, 64, 77],
       [94, 16, 55, 11, 71, 45, 43, 30, 98, 39],
       [76, 97, 60, 24, 45, 68, 57, 53, 21, 59],
       [34, 35, 61, 35, 22, 38, 75, 40, 35, 43]])

In [1]:
import numpy as np 

In [6]:
np.save('matrix.npy' , matrix)

In [4]:
np.load('matrix.npy')

array([[28, 30, 79, 90, 90, 71, 68, 72, 79, 91],
       [33, 15, 46, 87, 66, 16, 86, 63, 56, 40],
       [45, 58, 90, 10, 36, 38, 88, 89, 96, 83],
       [71, 19, 92, 35, 21, 84, 27, 32, 71, 51],
       [23, 58, 58, 83, 82, 93, 45, 64, 79, 77],
       [81, 60, 13, 77, 57, 11, 50, 70, 11, 82],
       [85, 89, 62, 29, 19, 81, 10, 14, 64, 77],
       [94, 16, 55, 11, 71, 45, 43, 30, 98, 39],
       [76, 97, 60, 24, 45, 68, 57, 53, 21, 59],
       [34, 35, 61, 35, 22, 38, 75, 40, 35, 43]])

###### mean

###### median

###### Mode

###### range, Interquartile range,variance , Quartile and standard deviation , correlation