# Numpy

## Installation 



In [None]:
!pip install numpy

## Importing numpy convention

In [1]:
# import numpy
import numpy as np

In [2]:
np.__version__

'1.18.5'

## What is Numpy?

Numpy is a package in Python created to help dealing with mathematical operations. 

- Standard Deviation
- Mean
- Generate random numbers
- Selecting random items
- sine, cosine, $\pi$

In [None]:
np.std([10, 10, 12, 13, 8, 7])

In [None]:
np.mean([11, 10, 12, 13, 8, 7])

In [None]:
np.random.randint(0, 10, size=10)

In [None]:
np.random.random(size=3)

In [None]:
np.random.random()

In [None]:
np.random.choice(['Donato','Diego','Raiana','Rodrigo'])

In [None]:
np.pi

In [None]:
np.sin(np.pi / 4)

## What are numpy arrays?

Numpy arrays are very close to **lists** in python. The main differences for us in data analysis are:

- You can perform mathematical operations within numpy arrays
- There are *LOTS* of new mathematical **methods** that you can use in numpy arrays
- Operations are much faster (if you are dealing with big data). In more depth, numpy arrays are contiguous blocks of memory.

In [None]:
np.array([1, 2])

In [None]:
type([1, 2])

In [None]:
type(np.array([1, 2]))

## Creating a numpy array from a list

In [None]:
a = [1,3,5,10]

In [None]:
a

In [None]:
type(a)

- what happens if you try to multiply the elements of a list by 2? 
- how do you check if the elements of a list is greater than 3?

In [None]:
a

In [None]:
a * 2

In [None]:
a > 3

In [None]:
b = np.array(a)

In [None]:
b
# check methods of b.<TAB>

In [None]:
b

In [None]:
b * 2

In [None]:
b + 2

In [None]:
b > 3

### 1-D array

1-D array is what we usually call a **vector**

In [None]:
array_1d = np.random.random(size=10)
array_1d

In [None]:
array_1d.shape

In [None]:
array_1d[-1]

In [None]:
array_1d.max()

### 2-D array: **row, column**: this is the most important 

2-D arrays are what we usually call a **matrix**


- Imagine that rows are the observations of your data set
- Imagine that columns are the characteristics of your data



In [None]:
np.random.random(size=(5, 3))

In [None]:
array_2d = np.random.random(size=(5,3))

In [None]:
array_2d

In [None]:
array_2d.shape

In [None]:
array_2d.size

### N-D array

In [None]:
array_3d = np.random.random((4,2,3))

In [None]:
array_3d

In [None]:
array_3d.shape

# Accessing elements of numpy arrays

## 1-D array: `array_1d[index]` (like lists):

In [None]:
array_1d

In [None]:
array_1d[0]

In [None]:
array_1d[3]

In [None]:
array_1d[-1]

## 2-D arrays:  `array_2d[row_number, column_number]`

In [None]:
array_2d

In [None]:
array_2d[1, 2]

In [None]:
array_2d[1, -1]

In [None]:
array_2d[-1, 0]

In [None]:
array_2d[1, 0:2]

In [None]:
array_2d[0:,2]

In [None]:
array_2d[:,0:2]

In [None]:
array_2d[:, 0] = -1

In [None]:
array_2d

- 3D arrays works the same way, `array_3d[x_index, y_index, z_index]`

In [None]:
array_3d

In [None]:
array_3d.shape

In [None]:
array_3d[3,0,1:3]

# Converting to np.array

Sometimes you have a list, or a list of lists and you want to convert it to a numpy array to perform math operations or use the numpy methods. You just need to `np.array(thing)` and it will try to convert it to a numpy array.

In [None]:
my_list = [1,4,7,8]
my_list

In [None]:
np.array(my_list)

In [None]:
my_list

- **NOTE**: don't forget that the above haven't exactly `converted` my_list to a numpy array. `my_list` remains intact, but `np.array(my_list)` **returns** a numpy array. If you want to store it, you have to **assign** it to another variable (or the same, actually, if you really want to transform it)

In [None]:
another_variable = np.array(my_list)

In [None]:
my_list

In [None]:
another_variable

In [None]:
# now my_list is in fact converted
my_list = np.array(my_list)

In [None]:
my_list

In [None]:
list_of_lists = [[1,2,3],
                 [4,5,6],
                 [7,8,9]]

In [None]:
array_lst_lst = np.array(list_of_lists)
array_lst_lst

In [None]:
array_lst_lst.shape

# Mathematical methods

## sum

In [None]:
a = np.random.randint(0, 10, size=(3,6))
a

In [None]:
a.shape

In [None]:
a.sum()

### The `axis` concept - very useful

- The `axis` concept: 
    - axis = 0 refers to: 
    > **sum over the first axis** (first axis == `rows`)
        - So for each column, it will sum over all the rows
        
    - axis = 1 refers to: 
    > **sum over the second axis** (second axis == `columns`)
        - So for each row, it will sum over all the columns

In [None]:
a

In [None]:
a.sum(axis=0)

In [None]:
a.sum(axis=1)

-----

## mean

$$ \bar{x} = \frac{1}{N}\left(\sum_{i=0}^{N-1} x_i\right)  $$

In [None]:
a

In [None]:
a.mean()

In [None]:
# mean for each column?

a.mean(axis=0)

In [None]:
# mean for each row?
a.mean(axis=1)

# Transpose Matrix

In [None]:
a

In [None]:
a.transpose()

In [None]:
a.T

# Operations between np.arrays

In [None]:
a

In [None]:
c = np.random.randint(0,10, size=a.shape)
c

In [None]:
a + c

In [None]:
a - c

In [None]:
a + 1

# Reshaping arrays

In [None]:
a = np.random.randint(0, 16, size=64)
a

In [None]:
a.reshape((8,8))

In [None]:
# application: images! 

image = np.array([0.,  0.,  0., 12., 13.,  0.,  0.,  0.,  0.,  0.,  5., 16.,  8.,
                  0.,  0.,  0.,  0.,  0., 13., 16.,  3.,  0.,  0.,  0.,  0.,  0.,
                  14., 13.,  0.,  0.,  0.,  0.,  0.,  0., 15., 12.,  7.,  2.,  0.,
                  0.,  0.,  0., 13., 16., 13., 16.,  3.,  0.,  0.,  0.,  7., 16.,
                  11., 15.,  8.,  0.,  0.,  0.,  1.,  9., 15., 11.,  3.,  0.])

In [None]:
image.reshape(8,8)

In [None]:
# sneaky peak on plots <3: 
import matplotlib.pyplot as plt

plt.imshow(image.reshape(8,8), cmap='binary')

In [None]:
# sneaky peak on plots <3: 
import matplotlib.pyplot as plt

plt.imshow(image.reshape(8,8), cmap='binary')

In [None]:
from sklearn.datasets import load_sample_image
image = load_sample_image('china.jpg')
image

In [None]:
image.shape

In [None]:
image.reshape(427 * 640, 3).shape

In [None]:
plt.imshow(image)

In [None]:
arr = np.array([ 
    99,  103, 82,  88,  106,  104,  127,  105,  84,   95,   101,  100, 
    101, 74,  93,  182, 181,  194,  223,  239,  146,  46,   76,   102,  
    76,  76,  206, 242, 222,  250,  246,  223,  208,  150,  97,   75,  
    50,  147, 251, 132, 125,  145,  136,  52,   90,   241,  200,  76,   
    62,  188, 119, 5,   19,   17,   17,   28,   29,   180,  185,  55,   
    84,  151, 49,  23,  23,   42,   36,   17,   28,   158,  182,  50,   
    68,  168, 86,  47,  61,   41,   45,   98,   117,  181,  236,  87,   
    67,  159, 91,  172, 246,  88,   122,  245,  225,  194,  234,  108,  
    57,  88,  65,  63,  98,   29,   78,   113,  74,   150,  220,  66,  
    51,  82,  101, 4,   20,   25,   107,  78,   28,   213,  161,  22, 
    66,  40,  140, 107, 20,   69,   170,  106,  177,  218,  38,   15,  
    66,  32,  109, 148, 29,   46,   129,  154,  220,  155,  1,    32,  
    66,  42,  83,  190, 153,  113,  160,  206,  254,  147,  7,    41,
    69,  60,  21,  181, 255,  175,  209,  256,  250,  39,   1,    45,  
    73,  54,  19,  111, 256,  256,  244,  148,  56,   118,  13,   20,  
    60,  50,  133, 49,  79,   135,  133,  56,   81,   243,  160,  38])

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(16,12))
plt.imshow(arr.reshape((16,12)), cmap='binary')

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(16,12))
plt.imshow(arr.reshape((16,12)), cmap='binary')

reshaped_arr = arr.reshape((16, 12))

for i in range(reshaped_arr.shape[0]):
    for j in range(reshaped_arr.shape[1]):
        plt.text(j, i, reshaped_arr[i,j])