# Introduction to Numpy
Learning NumPy

In [30]:
import numpy as np
np.__version__

'1.18.1'

## Defferences between lists and NumPy Arrays
* An array's size is immutable. You cannot append, insert or remove elements, like you can with a list.
* All of an array's elements must be of the same [data type](https://docs.scipy.org/doc/numpy-1.14.0/user/basics.types.html).
* A NumPy array behaves in a Pythonic fashion. You can len(my_array) just like you would assume.
* A two dimensional array = matrix (there are n-dimensional arrays as well, called matrices)
* The rank of an array or matrix refers to how many dimensions it has


In [31]:
gpas_as_list = [4.0, 3.286, 3.5]

In [32]:
# Can have elements appended to it
gpas_as_list.append(4.0)
# Can have multiple datatypes in it
gpas_as_list.insert(1, "Whatevs")
# Can have items removed
gpas_as_list.pop(1)

'Whatevs'

In [33]:
gpas_as_list

[4.0, 3.286, 3.5, 4.0]

In [34]:
gpas = np.array(gpas_as_list)

In [77]:
?gpas

In [36]:
gpas.dtype

dtype('float64')

In [37]:
gpas.size

4

## Multidimensional Arrays

* The data structure is actually called `ndarray`, representing any **n**umber of **d**imensions
* Arrays can have multiple dimensions, you declare them on creation
* Dimensions help define what each element in the array represents.  A two dimensional array is just an array of arrays
* **Rank** defines how many dimensions an array contains 
* **Shape** defines the length of each of the array's dimensions
* Each dimension is also referred to as an **axis**, and they are zero-indexed. Multiples are called **axes**.
* A 2d array is AKA **matrix**.
* axis 0 = rows; axis 1 = colums
* "axis" is just another name for "dimension"

In [55]:
students_gpas = np.array([
    [4.0, 3.2, 3.5, 4.0],
    [4.0, 3.2, 3.5, 4.0],
    [4.0, 3.2, 3.5, 4.0],
    [4.0, 3.2, 3.5, 4.0],
    [4.0, 3.2, 3.5, 4.0]
], np.float16)
students_gpas

array([[4. , 3.2, 3.5, 4. ],
       [4. , 3.2, 3.5, 4. ],
       [4. , 3.2, 3.5, 4. ],
       [4. , 3.2, 3.5, 4. ],
       [4. , 3.2, 3.5, 4. ]], dtype=float16)

In [51]:
students_gpas.ndim

2

In [52]:
students_gpas.shape

(4, 4)

In [53]:
# total amount of elements in the array
students_gpas.size

16

In [56]:
# number of "rows"
len(students_gpas)

5

In [63]:
# bite size for each element needed
print(students_gpas.itemsize)

# total amount of bites needed
students_gpas.itemsize * students_gpas.size

2


40

In [64]:
%whos ndarray

Variable        Type       Data/Info
------------------------------------
gpas            ndarray    4: 4 elems, type `float64`, 32 bytes
students_gpas   ndarray    5x4: 20 elems, type `float16`, 40 bytes
study_minutes   ndarray    100: 100 elems, type `uint16`, 200 bytes


In [67]:
np.info(students_gpas)

class:  ndarray
shape:  (5, 4)
strides:  (8, 2)
itemsize:  2
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x7fda30e683c0
byteorder:  little
byteswap:  False
type: float16


In [68]:
students_gpas[2][3]

4.0

## About data types
* By choosing the proper [data type](https://docs.scipy.org/doc/numpy-1.14.0/user/basics.types.html) you can greatly reduce the size required to store objects
* Data types are maintained by wrapping values in a [scalar representation](https://docs.scipy.org/doc/numpy-1.14.0/reference/arrays.scalars.html)
* `np.zeros` is a handy way to create an empty array filled with zeros.

In [38]:
study_minutes = np.zeros(100, np.uint16)
study_minutes

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint16)

In [39]:
# additional information on variables within jupyter notebook
%whos

Variable        Type       Data/Info
------------------------------------
gpas            ndarray    4: 4 elems, type `float64`, 32 bytes
gpas_as_list    list       n=4
np              module     <module 'numpy' from '/Us<...>kages/numpy/__init__.py'>
study_minutes   ndarray    100: 100 elems, type `uint16`, 200 bytes
sys             module     <module 'sys' (built-in)>


In [40]:
study_minutes[0] = 150
first_day_minutes = study_minutes[0]

In [41]:
first_day_minutes

150

In [42]:
type(first_day_minutes)

numpy.uint16

In [43]:
study_minutes[1] = 60

In [44]:
second_day_minutes = study_minutes[1]

In [45]:
second_day_minutes

60

In [47]:
study_minutes[2:6] = [80, 60, 30, 90]
study_minutes

array([150,  60,  80,  60,  30,  90,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0], dtype=uint16)

In [69]:
study_minutes = np.array([
    study_minutes,
    np.zeros(100, np.uint16)
])

In [70]:
study_minutes.shape

(2, 100)

In [74]:
# Set round/row 2 day 1 to 60
study_minutes[1, 0] = 60

In [75]:
study_minutes[1, 0]

60

In [80]:
rand = np.random.RandomState(42)
fake_log = rand.randint(30, 180, size=100, dtype=np.uint16)
fake_log

array([132, 122, 128,  44, 136, 129, 101,  95,  50, 132, 151,  64, 104,
       175, 117, 146, 139, 129, 133, 176,  98, 160, 179,  99,  82, 142,
        31, 106, 117,  56,  98,  67, 121, 159,  81, 170,  31,  50,  49,
        87, 179,  51, 116, 177, 118,  78, 171, 117,  88, 123, 102,  44,
        79,  31, 108,  80,  59, 137,  84,  93, 155, 160,  67,  80, 166,
       164,  70,  50, 102, 113,  47, 131, 161, 118,  82,  89,  81,  43,
        81,  38, 119,  52,  82,  31, 159,  57, 113,  71, 121, 140,  91,
        70,  37, 106,  64, 127, 110,  58,  93,  79], dtype=uint16)

In [81]:
[fake_log[3], fake_log[8]]

[44, 50]

In [82]:
fake_log[[3, 8]]

array([44, 50], dtype=uint16)

In [85]:
index = np.array([
    [3, 8],
    [0, 1]
])
fake_log[index]

array([[ 44,  50],
       [132, 122]], dtype=uint16)

In [136]:
study_minutes = np.append(study_minutes, [fake_log], axis=0)

In [137]:
study_minutes[1, 1] = 360

In [138]:
# study_minutes[study_minutes>50]

array([150,  60,  80,  60,  90,  60, 360, 132, 122, 128, 136, 129, 101,
        95, 132, 151,  64, 104, 175, 117, 146, 139, 129, 133, 176,  98,
       160, 179,  99,  82, 142, 106, 117,  56,  98,  67, 121, 159,  81,
       170,  87, 179,  51, 116, 177, 118,  78, 171, 117,  88, 123, 102,
        79, 108,  80,  59, 137,  84,  93, 155, 160,  67,  80, 166, 164,
        70, 102, 113, 131, 161, 118,  82,  89,  81,  81, 119,  52,  82,
       159,  57, 113,  71, 121, 140,  91,  70, 106,  64, 127, 110,  58,
        93,  79, 132, 122, 128, 136, 129, 101,  95, 132, 151,  64, 104,
       175, 117, 146, 139, 129, 133, 176,  98, 160, 179,  99,  82, 142,
       106, 117,  56,  98,  67, 121, 159,  81, 170,  87, 179,  51, 116,
       177, 118,  78, 171, 117,  88, 123, 102,  79, 108,  80,  59, 137,
        84,  93, 155, 160,  67,  80, 166, 164,  70, 102, 113, 131, 161,
       118,  82,  89,  81,  81, 119,  52,  82, 159,  57, 113,  71, 121,
       140,  91,  70, 106,  64, 127, 110,  58,  93,  79, 132, 12

In [139]:
study_minutes

array([[150,  60,  80,  60,  30,  90,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0],
       [ 60, 360,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   

In [147]:
quarterly_revenue = np.array([[
    22.72, 29.13, 25.36, 35.75],
    [1, 2, 3, 4]
])

quarterly_revenue[[1, 3]]

IndexError: index 3 is out of bounds for axis 0 with size 2

In [142]:
quarterly_revenue[1][3]
# quarterly_revenue[1, 3] same thing

4.0

In [135]:
quarterly_revenue.shape

(8,)