### Numpy intro 

* higher dimensional data structure 
* selection of elements within the list lines up nicely to python
* performing operations on an array instead of a scalar

In [6]:
import numpy as np

In [7]:
np.array([1, 4, 5])

array([1, 4, 5])

In [8]:
np.ones(4)

array([1., 1., 1., 1.])

In [12]:
np.zeros((4, 2))

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

In [19]:
np.zeros((2,3, 2))

array([[[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]]])

In [20]:
np.full((3,2,4), 'hello')

array([[['hello', 'hello', 'hello', 'hello'],
        ['hello', 'hello', 'hello', 'hello']],

       [['hello', 'hello', 'hello', 'hello'],
        ['hello', 'hello', 'hello', 'hello']],

       [['hello', 'hello', 'hello', 'hello'],
        ['hello', 'hello', 'hello', 'hello']]], dtype='<U5')

In [30]:
np.linspace(0, 10, 20)

array([ 0.        ,  0.52631579,  1.05263158,  1.57894737,  2.10526316,
        2.63157895,  3.15789474,  3.68421053,  4.21052632,  4.73684211,
        5.26315789,  5.78947368,  6.31578947,  6.84210526,  7.36842105,
        7.89473684,  8.42105263,  8.94736842,  9.47368421, 10.        ])

In [38]:
evens = np.arange(0, 11, 2)
evens

array([ 0,  2,  4,  6,  8, 10])

In [37]:
odds = np.arange(1, 11, 2)
odds

array([1, 3, 5, 7, 9])

In [43]:
not_quite_evens = np.append(evens, 'foo')

In [42]:
odds.dtype

dtype('int64')

In [44]:
not_quite_evens.dtype

dtype('<U21')

In [47]:
type(not_quite_evens[2])

numpy.str_

In [49]:
nums = list(range(0, 5))

In [50]:
nums.append('foo')

In [51]:
nums[]

[0, 1, 2, 3, 4, 'foo']

* Speed 
* Space 

In [52]:
evens = np.arange(0, 9, 2)

In [54]:
evens.dtype

dtype('int64')

In [62]:
strs_as_numbers = evens.astype('<U21')

In [63]:
strs_as_numbers

array(['0', '2', '4', '6', '8'], dtype='<U21')

In [67]:
strs_as_numbers[1] = strs_as_numbers[1] + '1'

In [68]:
strs_as_numbers

array(['0', '21', '4', '6', '8'], dtype='<U21')

In [69]:
strs_as_numbers.astype('int')

array([ 0, 21,  4,  6,  8])

### multidimensional arrays

In [70]:
evens

array([0, 2, 4, 6, 8])

In [71]:
odds

array([1, 3, 5, 7, 9])

In [76]:
stack_axis_zero = np.stack([evens, odds])
stack_axis_zero

array([[0, 2, 4, 6, 8],
       [1, 3, 5, 7, 9]])

In [77]:
stack_axis_one = np.stack([evens, odds], axis = 1)
stack_axis_one

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [87]:
numbers_through_ten = np.arange(1, 11).reshape(2, 5)

In [88]:
numbers_through_ten + 1

array([[ 2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11]])

In [90]:
numbers_through_ten[1] - 3

array([3, 4, 5, 6, 7])

In [93]:
numbers_through_ten[1] = 3

In [94]:
numbers_through_ten

array([[1, 2, 3, 4, 5],
       [3, 3, 3, 3, 3]])

In [98]:
numbers_through_ten

array([[1, 2, 3, 4, 5],
       [3, 3, 3, 3, 3]])

In [99]:
numbers_through_ten[0] = numbers_through_ten[0] + numbers_through_ten[1]

In [100]:
numbers_through_ten

array([[4, 5, 6, 7, 8],
       [3, 3, 3, 3, 3]])

In [101]:
numbers_through_ten.shape

(2, 5)

In [103]:
numbers_through_ten[:, 1]

array([5, 3])

In [108]:
stacked_numbers = np.stack([stack_axis_one, stack_axis_one + 100])

In [109]:
stacked_numbers

array([[[  0,   1],
        [  2,   3],
        [  4,   5],
        [  6,   7],
        [  8,   9]],

       [[100, 101],
        [102, 103],
        [104, 105],
        [106, 107],
        [108, 109]]])

In [114]:
stacked_numbers[:, 0, 0]

array([  0, 100])

In [118]:
stack_axis_one == 1

array([[False,  True],
       [False, False],
       [False, False],
       [False, False],
       [False, False]])

In [119]:
stack_axis_one

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [122]:
results = stack_axis_one < 3
results

array([[ True,  True],
       [ True, False],
       [False, False],
       [False, False],
       [False, False]])

In [123]:
stack_axis_one[results]

array([0, 1, 2])

In [125]:
stack_axis_one[stack_axis_one == 3]

array([3])

In [127]:
stack_axis_one

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [129]:
stack_axis_one[:, 1][stack_axis_one[:, 1] < 5]

array([1, 3])

In [131]:
stack_axis_one[:, 1] < 5

array([ True,  True, False, False, False])

In [135]:
stack_axis_one[stack_axis_one[:, 1] < 5]

array([[0, 1],
       [2, 3]])

In [136]:
import pandas as pd

In [141]:
msas= pd.read_html("https://en.wikipedia.org/wiki/List_of_metropolitan_statistical_areas")[1]

In [143]:
msas_array = msas.values

In [147]:
msas_array[1:, 1]

array(['New York-Newark-Jersey City, NY-NJ-PA MSA',
       'Los Angeles-Long Beach-Anaheim, CA MSA',
       'Chicago-Naperville-Elgin, IL-IN-WI MSA',
       'Dallas-Fort Worth-Arlington, TX MSA',
       'Houston-The Woodlands-Sugar Land, TX MSA',
       'Washington-Arlington-Alexandria, DC-VA-MD-WV MSA',
       'Miami-Fort Lauderdale-West Palm Beach, FL MSA',
       'Philadelphia-Camden-Wilmington, PA-NJ-DE-MD MSA',
       'Atlanta-Sandy Springs-Roswell, GA MSA',
       'Boston-Cambridge-Newton, MA-NH MSA',
       'Phoenix-Mesa-Scottsdale, AZ MSA',
       'San Francisco-Oakland-Hayward, CA MSA',
       'Riverside-San Bernardino-Ontario, CA MSA',
       'Detroit-Warren-Dearborn, MI MSA',
       'Seattle-Tacoma-Bellevue, WA MSA',
       'Minneapolis-St. Paul-Bloomington, MN-WI MSA',
       'San Diego-Carlsbad, CA MSA',
       'Tampa-St. Petersburg-Clearwater, FL MSA',
       'Denver-Aurora-Lakewood, CO MSA',
       'Baltimore-Columbia-Towson, MD MSA', 'St. Louis, MO-IL MSA',
       'Char