# Lecture 18 – Row Manipulation

## Data 94, Spring 2021

In [None]:
from datascience import *
import numpy as np
import plotly.express as px

In [None]:
sat = Table.read_table('data/sat2014-lecture.csv')

In [None]:
sat

In [None]:
sat.num_rows, sat.num_columns

It would be nice to have a combined score too.

In [None]:
sat.column('Critical Reading') + sat.column('Math') + sat.column('Writing')

In [None]:
sat = sat.with_columns(
    'Combined', sat.column('Critical Reading') + sat.column('Math') + sat.column('Writing')
)

In [None]:
sat

## `sort`

Which states had the highest combined scores? The lowest?

In [None]:
sat.sort('Combined')

In [None]:
# By default, descending is False
sat.sort('Combined', descending = True)

We can sort by any column:

In [None]:
sat.sort('Participation Rate')

In [None]:
sat.sort('Math', descending = True)

In [None]:
# Can also sort alphabetical columns; in this case the table was sorted by state name by default
sat.sort('State')

## `take`

In [None]:
sat

In [None]:
sat.take(2)

In [None]:
sat.take(np.array([1, 4, 3]))

In [None]:
np.arange(5)

In [None]:
sat.take(np.arange(5))

When we combine `sort` and `take`, we can get some pretty powerful answers.

What are the top 5 states according to math scores?

In [None]:
sat.sort('Combined', descending = True).take(np.arange(5))

What are the top 8 states in terms of participation?

In [None]:
sat.sort('Participation Rate', descending = True).take(np.arange(8))

Note: `.take` works on arrays too, not just tables!

In [None]:
sat.column('State').take(np.arange(5))

In [None]:
sat.take(np.arange(5)).column('State')

### Quick Check 1

In [None]:
animals = Table.read_table('data/animals.csv')

In [None]:
animals

In [None]:
# animals._____(_____).column(_____).take(_____)

## `where`

In [None]:
sat

In [None]:
sat.where('Combined', are.above(1800))

In [None]:
sat.where('State', are.equal_to('California'))

In [None]:
sat.where('State', are.containing('Dakota'))

In [None]:
sat.where('Math', are.between(580, 600))

### Shortcut for `are.equal_to`

In [None]:
sat.where('State', are.equal_to('Pennsylvania'))

In [None]:
sat.where('State', 'Pennsylvania')

### Multiple conditions

In [None]:
sat

In [None]:
sat.where('Participation Rate', are.above(20)).where('Combined', are.above(1500))

In [None]:
sat.where('Participation Rate', are.below(10)).where('Combined', are.above(1600))

In [None]:
deep_south = np.array(['Alabama', 'Georgia', 'Louisiana', 'Mississippi', 'South Carolina'])

In [None]:
sat.where('State', are.contained_in(deep_south))

In [None]:
sat.where('State', are.contained_in(deep_south)) \
   .where('Participation Rate', are.below(10)) \
   .where('Combined', are.above(1600))

In [None]:
px.scatter(data_frame = sat.to_df(), 
           x = 'Combined', 
           y = 'Participation Rate', 
           hover_data = {'State': True},
           title = 'Participation Rate vs. Combined SAT Score for States in 2014')

### Quick Check 2

In [None]:
wnba = Table.read_table('data/wnba-2020.csv').select('Player', 'Tm', 'Pos', 'G', 'PTS')

In [None]:
wnba

In [None]:
# wnba.where(____, ____) \
#     .where('G',____) \
#     .column(____).mean()