# Introduction to Numpy
Learning NumPy!

In [1]:
import numpy as np
np.__version__

'1.14.5'

## Differences between lists and NumPy Arrays
* An array's size is immutable.  You cannot append, insert or remove elements, like you can with a list.
* All of an array's elements must be of the same [data type](https://docs.scipy.org/doc/numpy-1.14.0/user/basics.types.html).
* A NumPy array behaves in a Pythonic fashion.  You can `len(my_array)` just like you would assume.

In [2]:
gpas_as_list = [4.0, 3.286, 3.5]

In [3]:
# Can have elements appended to it
gpas_as_list.append(4.0)
# Can have multiple datatypes in it.
gpas_as_list.insert(1, "Whatevs")
# Can have items removed
gpas_as_list.pop(1)

'Whatevs'

In [4]:
gpas_as_list

[4.0, 3.286, 3.5, 4.0]

In [5]:
gpas = np.array(gpas_as_list)

In [6]:
?gpas

In [7]:
gpas.dtype

dtype('float64')

In [8]:
gpas.itemsize

8

In [9]:
gpas.size

4

In [10]:
len(gpas)

4

In [11]:
gpas.nbytes

32

## Multidimensional Arrays
* The data structure is actually called `ndarray`, representing any **n**umber of **d**imensions
* Arrays can have multiple dimensions, you declare them on creation
* Dimensions help define what each element in the array represents.  A two dimensional array is just an array of arrays
* **Rank** defines how many dimensions an array contains 
* **Shape** defines the length of each of the array's dimensions
* Each dimension is also referred to as an **axis**, and they are zero-indexed. Multiples are called **axes**.
* A 2d array is AKA **matrix**.

In [12]:
students_gpas = np.array([
    [4.0, 3.286, 3.5, 4.0],
    [3.2, 3.8, 4.0, 4.0],
    [3.96, 3.92, 4.0, 4.0]
], np.float16)
students_gpas

array([[4.   , 3.285, 3.5  , 4.   ],
       [3.2  , 3.8  , 4.   , 4.   ],
       [3.96 , 3.92 , 4.   , 4.   ]], dtype=float16)

In [13]:
students_gpas.ndim

2

In [14]:
students_gpas.shape

(3, 4)

In [15]:
students_gpas.size

12

In [16]:
len(students_gpas)

3

In [17]:
students_gpas.itemsize

2

In [18]:
students_gpas.itemsize * students_gpas.size

24

In [19]:
%whos ndarray

Variable        Type       Data/Info
------------------------------------
gpas            ndarray    4: 4 elems, type `float64`, 32 bytes
students_gpas   ndarray    3x4: 12 elems, type `float16`, 24 bytes


In [20]:
np.info(students_gpas)

class:  ndarray
shape:  (3, 4)
strides:  (8, 2)
itemsize:  2
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x7ffeccf7ea00
byteorder:  little
byteswap:  False
type: float16


In [21]:
students_gpas[2]

array([3.96, 3.92, 4.  , 4.  ], dtype=float16)

In [22]:
students_gpas[2][3]

4.0

## About data types
* By choosing the proper [data type](https://docs.scipy.org/doc/numpy-1.14.0/user/basics.types.html) you can greatly reduce the size required to store objects
* Data types are maintained by wrapping values in a [scalar representation](https://docs.scipy.org/doc/numpy-1.14.0/reference/arrays.scalars.html)
* `np.zeros` is a handy way to create an empty array filled with zeros.

In [23]:
study_minutes = np.zeros(100, np.uint16)
study_minutes

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint16)

In [24]:
%whos

Variable        Type       Data/Info
------------------------------------
gpas            ndarray    4: 4 elems, type `float64`, 32 bytes
gpas_as_list    list       n=4
np              module     <module 'numpy' from '/Us<...>kages/numpy/__init__.py'>
students_gpas   ndarray    3x4: 12 elems, type `float16`, 24 bytes
study_minutes   ndarray    100: 100 elems, type `uint16`, 200 bytes


In [25]:
60 * 24

1440

In [26]:
study_minutes[0] = 150

In [27]:
first_day_minutes = study_minutes[0]

In [28]:
first_day_minutes

150

In [29]:
type(first_day_minutes)

numpy.uint16

In [30]:
# TODO: Add 60 minutes to the second day in the study_minutes array
study_minutes[1] = 60

In [31]:
study_minutes[2:6] = [80, 60, 30, 90]

## Creation 
* You can create a random but bound grouping of values using the `np.random` package.  
  * `RandomState` let's you seed your randomness in a way that is repeatable.
* You can append a row in a couple of ways
   * You can use the `np.append` method.  Make sure the new row is the same shape.
   * You can create/reassign a new array by including the existing array as part of the iterable in creation.


## Indexing
* You can use an indexing shortcut by separating dimensions with a comma.  
* You can index using a `list` or `np.array`.  Values will be pulled out at that specific index.  This is known as fancy indexing.
  * Resulting array shape matches the index array layout.  Be careful to distinguish between the tuple shortcut and fancy indexing.

In [32]:
study_minutes = np.array([
    study_minutes,
    np.zeros(100, np.uint16)
])

In [33]:
study_minutes.shape

(2, 100)

In [34]:
# Set round 2 day 1 to 60
study_minutes[1][0] = 60

In [35]:
study_minutes[1, 0]

60

In [36]:
1, 0

(1, 0)

In [37]:
rand = np.random.RandomState(42)
fake_log = rand.randint(30, 180, size=100, dtype=np.uint16)
fake_log

array([132, 122, 128,  44, 136, 129, 101,  95,  50, 132, 151,  64, 104,
       175, 117, 146, 139, 129, 133, 176,  98, 160, 179,  99,  82, 142,
        31, 106, 117,  56,  98,  67, 121, 159,  81, 170,  31,  50,  49,
        87, 179,  51, 116, 177, 118,  78, 171, 117,  88, 123, 102,  44,
        79,  31, 108,  80,  59, 137,  84,  93, 155, 160,  67,  80, 166,
       164,  70,  50, 102, 113,  47, 131, 161, 118,  82,  89,  81,  43,
        81,  38, 119,  52,  82,  31, 159,  57, 113,  71, 121, 140,  91,
        70,  37, 106,  64, 127, 110,  58,  93,  79], dtype=uint16)

In [38]:
[fake_log[3], fake_log[8]]

[44, 50]

In [39]:
fake_log[[3, 8]]

array([44, 50], dtype=uint16)

In [40]:
index = np.array([
    [3, 8],
    [0, 1]
])
fake_log[index]

array([[ 44,  50],
       [132, 122]], dtype=uint16)

In [41]:
study_minutes = np.append(study_minutes, [fake_log], axis=0)

In [42]:
study_minutes[1, 1] = 360

## Boolean Array Indexing
* You can create a boolean array by using comparison operators on an array.
  * You can use boolean arrays for fancy indexing.
  * Boolean arrays can be compared by using bitwise operators (`&`, `|`)
      * Do not use the `and` keyword.
      * Remember to mind the order of operations when combining
* Even though boolean indexing returns a new array, you can update an existing array using a boolean index.

In [44]:
fake_log[fake_log < 60]

array([44, 50, 31, 56, 31, 50, 49, 51, 44, 31, 59, 50, 47, 43, 38, 52, 31,
       57, 37, 58], dtype=uint16)

In [45]:
results = []
for value in fake_log:
    if value < 60:
        results.append(value)
np.array(results)

array([44, 50, 31, 56, 31, 50, 49, 51, 44, 31, 59, 50, 47, 43, 38, 52, 31,
       57, 37, 58], dtype=uint16)

In [47]:
study_minutes[study_minutes < 60]

array([30,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0, 44, 50, 31, 56, 31, 50, 49, 51, 44, 31, 59,
       50, 47, 43, 38, 52, 31, 57, 37, 58], dtype=uint16)

In [50]:
np.array([False, True, True]) & np.array([True, False, True])

array([False, False,  True])

In [52]:
study_minutes[(study_minutes < 60) & (study_minutes > 0)]

array([30, 44, 50, 31, 56, 31, 50, 49, 51, 44, 31, 59, 50, 47, 43, 38, 52,
       31, 57, 37, 58], dtype=uint16)

In [53]:
study_minutes[study_minutes < 60] = 0

In [54]:
study_minutes[2]

array([132, 122, 128,   0, 136, 129, 101,  95,   0, 132, 151,  64, 104,
       175, 117, 146, 139, 129, 133, 176,  98, 160, 179,  99,  82, 142,
         0, 106, 117,   0,  98,  67, 121, 159,  81, 170,   0,   0,   0,
        87, 179,   0, 116, 177, 118,  78, 171, 117,  88, 123, 102,   0,
        79,   0, 108,  80,   0, 137,  84,  93, 155, 160,  67,  80, 166,
       164,  70,   0, 102, 113,   0, 131, 161, 118,  82,  89,  81,   0,
        81,   0, 119,   0,  82,   0, 159,   0, 113,  71, 121, 140,  91,
        70,   0, 106,  64, 127, 110,   0,  93,  79], dtype=uint16)

## Slicing
* Works a lot like normal list slicing.
* You can use commas to separate each dimension slice.
* Always returns a data view **not a copy**
* You can access the base object using the `ndarray.base` property

In [55]:
fruit = ["apple", "banana", "cherry", "durian"]

In [57]:
fruit[1:3]

['banana', 'cherry']

In [58]:
fruit[:3]

['apple', 'banana', 'cherry']

In [59]:
fruit[3:]

['durian']

In [60]:
fruit[:]

['apple', 'banana', 'cherry', 'durian']

In [61]:
copied = fruit[:]

In [62]:
copied[3] = 'cheese'
# Slicing a list returns a copy
fruit, copied

(['apple', 'banana', 'cherry', 'durian'],
 ['apple', 'banana', 'cherry', 'cheese'])

In [63]:
fruit[::2]

['apple', 'cherry']

In [64]:
fruit[::-1]

['durian', 'cherry', 'banana', 'apple']

In [65]:
np.arange(20)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [66]:
practice = np.arange(42)
practice.shape = (7, 6)
practice

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35],
       [36, 37, 38, 39, 40, 41]])

In [73]:
practice[2:5, 3::2]

array([[15, 17],
       [21, 23],
       [27, 29]])

In [74]:
# Any slicing of ndarray returns a view and not a copy!
not_copied = practice[:]
not_copied[0, 0] = 90210
practice, not_copied

(array([[90210,     1,     2,     3,     4,     5],
        [    6,     7,     8,     9,    10,    11],
        [   12,    13,    14,    15,    16,    17],
        [   18,    19,    20,    21,    22,    23],
        [   24,    25,    26,    27,    28,    29],
        [   30,    31,    32,    33,    34,    35],
        [   36,    37,    38,    39,    40,    41]]),
 array([[90210,     1,     2,     3,     4,     5],
        [    6,     7,     8,     9,    10,    11],
        [   12,    13,    14,    15,    16,    17],
        [   18,    19,    20,    21,    22,    23],
        [   24,    25,    26,    27,    28,    29],
        [   30,    31,    32,    33,    34,    35],
        [   36,    37,    38,    39,    40,    41]]))

In [75]:
practice.base is None

True

In [76]:
not_copied.base is None

False

In [77]:
not_copied.base is practice

True

In [78]:
practice.flags['OWNDATA'], not_copied.flags['OWNDATA']

(True, False)