# 1. Numpy (11 points)

# 1.1 Arrays

In [7]:
import numpy as np
array1 = np.array([2, 3, 4, 5])
array2 = np.arange(4)
array1, array2

(array([2, 3, 4, 5]), array([0, 1, 2, 3]))

In [8]:
array1 * 2

array([ 4,  6,  8, 10])

In [9]:
array1 * array2

array([ 0,  3,  8, 15])

In [10]:
array1 ** array2

array([  1,   3,  16, 125])

In [11]:
np.arange?

In [12]:
np.linspace

<function numpy.core.function_base.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None)>

# 1.2 Multidimensional Arrays

A multidimensional array is a primitive version of a table, containing only one kind of data and having no column labels. A 2-dimensional array is useful for working with matrices of numbers.

In [13]:
# The zeros function creates an array with the given shape.
# For a 2-dimensional array like this one, the first
# coordinate says how far the array goes *down*, and the
# second says how far it goes *right*.
array3 = np.zeros((4, 5))
array3

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [14]:
# The shape attribute returns the dimensions of the array.
array3.shape

(4, 5)

In [15]:
# You can think of array3 as an array containing 4 arrays, each
# containing 5 zeros.  Accordingly, we can set or get the third
# element of the second array in array 3 using standard Python
# array indexing syntax twice:
array3[1][2] = 7
array3

array([[0., 0., 0., 0., 0.],
       [0., 0., 7., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [16]:
# This comes up so often that there is special syntax provided
# for it.  The comma syntax is equivalent to using multiple
# brackets:
array3[1, 2] = 8
array3

array([[0., 0., 0., 0., 0.],
       [0., 0., 8., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [17]:
array4 = np.zeros((3, 5))
array4[:, 2] = 5
array4

array([[0., 0., 5., 0., 0.],
       [0., 0., 5., 0., 0.],
       [0., 0., 5., 0., 0.]])

In [18]:
array5 = np.zeros((3, 5))
rows = np.array([1, 0, 2])
cols = np.array([3, 1, 4])

# Indices (1,3), (0,1), and (2,4) will be set.
array5[rows, cols] = 3
array5

array([[0., 3., 0., 0., 0.],
       [0., 0., 0., 3., 0.],
       [0., 0., 0., 0., 3.]])

# 2. Pandas (17 points)

The code below produces the data frames used in the following.

In [19]:
import pandas as pd

heroes = pd.DataFrame(
    data={'color': ['red', 'green', 'black', 
                    'blue', 'black', 'red'],
          'first_seen_on': ['a', 'a', 'f', 'a', 'a', 'f'],
          'first_season': [2, 1, 2, 3, 3, 1]},
    index=['flash', 'arrow', 'vibe', 
           'atom', 'canary', 'firestorm']
)

identities = pd.DataFrame(
    data={'ego': ['barry allen', 'oliver queen', 'cisco ramon',
                  'ray palmer', 'sara lance', 
                  'martin stein', 'ronnie raymond'],
          'alter-ego': ['flash', 'arrow', 'vibe', 'atom',
                        'canary', 'firestorm', 'firestorm']}
)

teams = pd.DataFrame(
    data={'team': ['flash', 'arrow', 'flash', 'legends', 
                   'flash', 'legends', 'arrow'],
          'hero': ['flash', 'arrow', 'vibe', 'atom', 
                   'killer frost', 'firestorm', 'speedy']})

In [20]:
heroes

Unnamed: 0,color,first_seen_on,first_season
flash,red,a,2
arrow,green,a,1
vibe,black,f,2
atom,blue,a,3
canary,black,a,3
firestorm,red,f,1


In [21]:
identities

Unnamed: 0,ego,alter-ego
0,barry allen,flash
1,oliver queen,arrow
2,cisco ramon,vibe
3,ray palmer,atom
4,sara lance,canary
5,martin stein,firestorm
6,ronnie raymond,firestorm


In [22]:
teams

Unnamed: 0,team,hero
0,flash,flash
1,arrow,arrow
2,flash,vibe
3,legends,atom
4,flash,killer frost
5,legends,firestorm
6,arrow,speedy


# 2.1 Slice and Dice

In [23]:
heroes.loc[:, 'color']

flash          red
arrow        green
vibe         black
atom          blue
canary       black
firestorm      red
Name: color, dtype: object

In [24]:
heroes.loc[:, ['color', 'first_season']]

Unnamed: 0,color,first_season
flash,red,2
arrow,green,1
vibe,black,2
atom,blue,3
canary,black,3
firestorm,red,1


In [25]:
heroes.loc[['flash', 'vibe'], :]

Unnamed: 0,color,first_seen_on,first_season
flash,red,a,2
vibe,black,f,2


In [26]:
heroes.loc[['flash', 'vibe']]

Unnamed: 0,color,first_seen_on,first_season
flash,red,a,2
vibe,black,f,2


In [27]:
heroes.loc['flash':'atom', :'first_seen_on']

Unnamed: 0,color,first_seen_on
flash,red,a
arrow,green,a
vibe,black,f
atom,blue,a


In [28]:
heroes.iloc[:4, :2]

Unnamed: 0,color,first_seen_on
flash,red,a
arrow,green,a
vibe,black,f
atom,blue,a


In [29]:
heroes[(heroes['first_season']==3) & (heroes['first_seen_on']=='a')]

Unnamed: 0,color,first_seen_on,first_season
atom,blue,a,3
canary,black,a,3


In [30]:
heroes[heroes['first_season'].isin([1,3])]

Unnamed: 0,color,first_seen_on,first_season
arrow,green,a,1
atom,blue,a,3
canary,black,a,3
firestorm,red,f,1


# 2.2 Counting Rows

In [31]:
heroes['color'].value_counts()

red      2
black    2
blue     1
green    1
Name: color, dtype: int64

In [32]:
heroes.groupby(['color', 'first_season']).size().reset_index(name='count')

Unnamed: 0,color,first_season,count
0,black,2,1
1,black,3,1
2,blue,3,1
3,green,1,1
4,red,1,1
5,red,2,1


# 2.3 Joining Tables

In [33]:
heroes['hero'] = heroes.index
heroes

Unnamed: 0,color,first_seen_on,first_season,hero
flash,red,a,2,flash
arrow,green,a,1,arrow
vibe,black,f,2,vibe
atom,blue,a,3,atom
canary,black,a,3,canary
firestorm,red,f,1,firestorm


In [34]:
pd.merge(heroes, teams, how='inner', on='hero')

Unnamed: 0,color,first_seen_on,first_season,hero,team
0,red,a,2,flash,flash
1,green,a,1,arrow,arrow
2,black,f,2,vibe,flash
3,blue,a,3,atom,legends
4,red,f,1,firestorm,legends


In [35]:
pd.merge(heroes, teams, how='left', on='hero')

Unnamed: 0,color,first_seen_on,first_season,hero,team
0,red,a,2,flash,flash
1,green,a,1,arrow,arrow
2,black,f,2,vibe,flash
3,blue,a,3,atom,legends
4,black,a,3,canary,
5,red,f,1,firestorm,legends


In [36]:
pd.merge(heroes, teams, how='outer', on='hero')

Unnamed: 0,color,first_seen_on,first_season,hero,team
0,red,a,2.0,flash,flash
1,green,a,1.0,arrow,arrow
2,black,f,2.0,vibe,flash
3,blue,a,3.0,atom,legends
4,black,a,3.0,canary,
5,red,f,1.0,firestorm,legends
6,,,,killer frost,flash
7,,,,speedy,arrow
