# Lecture 21 – Joining and Row Methods

## Data 94, Spring 2021

In [None]:
from datascience import *
import numpy as np

## `join`ing

In [None]:
phones = Table().with_columns(
    'Model', np.array(['iPhone 12', 'iPhone 12 Pro Max', 'Samsung Galaxy S21', 'OnePlus 8']),
    'Price', np.array([799, 1099, 799, 699]),
    'Screen Size', np.array([6.1, 6.7, 6.2, 6.6])
)

inventory = Table().with_columns(
    'Handset', np.array(['Samsung Galaxy S21', 'iPhone 12', 'iPhone 12', 'OnePlus 8', 'Pixel 5']),
    'Units', np.array([50, 40, 10, 100, 25]),
    'Store', np.array(['Berkeley', 'Berkeley', 'San Francisco', 'Oakland', 'Oakland'])
)

In [None]:
phones

In [None]:
inventory

In [None]:
phones.join('Model', inventory, 'Handset')

In [None]:
inventory.join('Handset', phones, 'Model')

In [None]:
store = phones.join('Model', inventory, 'Handset')
store

In [None]:
store.column('Price') * store.column('Units')

In [None]:
# Total value of all of the phones in my inventory (that I know the price of)
np.sum(store.column('Price') * store.column('Units'))

In [None]:
# Equivalent to the above
np.dot(store.column('Price'), store.column('Units'))

### Quick Check 1

In [None]:
contacts = Table().with_columns(
    'Name', np.array(['Roxanne', 'Sandy', 'Stan', 'Tomas', 'Wilma']),
    'Email', np.array(['roxanne@berkeley.edu', 'sandy@nyu.edu', 'stan.vg@gmail.com', 'tomastrain@umich.edu', 'wilma@columbia.edu']),
    'Area Code', np.array([510, 212, 734, 734, 212]),
)

codes = Table().with_columns(
    'Code', np.array([212, 310, 519, 734]),
    'Region', np.array(['New York City', 'Los Angeles', 'Ontario, Canada', 'Metro Detroit'])
)

In [None]:
contacts

In [None]:
codes

In [None]:
# contacts.join(___, ___, ___)

### Followup

In [None]:
extra_codes = Table().with_columns(
    'Code', np.array([212, 212, 519, 734]),
    'Region', np.array(['New York City', 'Los Angeles', 'Ontario, Canada', 'Metro Detroit'])
)

In [None]:
contacts

In [None]:
extra_codes

In [None]:
contacts.join('Area Code', extra_codes, 'Code')

### Disclaimer

In [None]:
# No output – because there are no matches between
# the 'Name' column in contacts and the 'Code' column in codes
contacts.join('Name', codes, 'Code')

## Example: grades

In [None]:
roster = Table.read_table('data/roster.csv')
grades = Table.read_table('data/grades.csv')

In [None]:
roster.show()

In [None]:
grades.show()

In [None]:
roster.num_rows

In [None]:
grades.num_rows

In [None]:
grades_merged = roster.join('SID', grades)
grades_merged

In [None]:
grades_merged.num_rows

Let's see if there are any students in the roster who we don't have grades for:

In [None]:
for sid in roster.column('SID'):
    if sid not in grades.column('SID'):
        display(roster.where('SID', sid))

And vice versa:

In [None]:
for sid in grades.column('SID'):
    if sid not in roster.column('SID'):
        display(grades.where('SID', sid))

Whoever the above students are, they're not in our roster. (This happens often when students drop a class.)

Now we can do some grade calculations:

In [None]:
grades_merged

In [None]:
assignment_totals = {
    'Homework 1': 24,
    'Homework 2': 18,
    'Quiz 1': 100,
    'Quiz 2': 70,
    'Final': 90
}

In [None]:
# Divides each assignment column by its denominator in assignment_totals
for assignment in assignment_totals.keys():
    grades_merged = grades_merged.with_columns(
        assignment + ' Percentage', grades_merged.column(assignment) / assignment_totals[assignment]
    )

In [None]:
grades_merged

## Other tools

### `.row`

In [None]:
phones

In [None]:
phones.row(1)

In [None]:
type(phones.row(1))

In [None]:
phones.row(1).item(1)

In [None]:
list(phones.row(-1))

### `.with_rows`

In [None]:
phones

In [None]:
phones.with_row(['iPhone 12 Mini', 699, 5.8])

In [None]:
phones.with_rows([['iPhone 12 Mini', 699, 5.8],
                  ['Moto RAZR', 459, 3.5]])

### Quick Check 2

In [None]:
codes