# Class 006 Tables

In [None]:
from datascience import *
import numpy as np
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
# Fix for datascience plots

# import for plotting
%matplotlib inline

## Create a table object from an array.
As an example we can create a table directly from data inserted into an array. The simplest table has a single column of data.

In [None]:
# Example of making an array

tornados_by_month = make_array(0, 0, 0, 1, 0, 0, 0, 1, 5, 1, 0, 0)
tornados_by_month

In [None]:
# Each column needs a label and an array

T = Table().with_columns('Tornados', tornados_by_month)
T

## Add a column to a table 

In [None]:
# Here we add a column to the existing table. 
# The np.arange() function returns an array
# So again, we have a column with a label and an array
# Our new table has two columns, each with a label and an array.

T = T.with_columns('Month', np.arange(1,13,1))

In [None]:
T

## Sort a table

In [None]:
# Sort the table by the values of the first column

T.sort(0,descending=True)

## Select a subset of columns from a table to create a new table

In [None]:
# .select() is for selecting one or more columns from a table
# It returns a new table

T.select("Tornados")

## Select a subset of rows from a table to create a new table

In [None]:
# .take() is for taking rows from a table
# Again, it returns a new table
# Here we take the first three rows

T.take(0,1,2)

In [None]:
# remember to use np.arange to generate sequences
# This does the same thing as the last cell

T.take(np.arange(0,3))

# Census data example

In [None]:
# Read the table from a web site.

data_url = 'http://www2.census.gov/programs-surveys/popest/datasets/2010-2020/national/asrh/nc-est2020-agesex-res.csv'
full_census_table = Table().read_table(data_url)
full_census_table

## List all of the column labels

In [None]:
# Let's get a list of all the table's column labels

full_census_table.labels

In [None]:
# Remember that select creates a new table with just the selected columns

partial_census_table = full_census_table.select('SEX', 'AGE', 'POPESTIMATE2010', 'POPESTIMATE2020')
partial_census_table

## Extract the data from a table columns as an array

In [None]:
# .select() returns a new table with the selected columns
# .column() returns the actual data as an array from the chosen column

type(partial_census_table.column("AGE"))

In [None]:
partial_census_table.column("AGE")

## Rename a column

In [None]:
# Renaming columns in a table
# To rename three columns we can just chain together the .relabel() operations

us_pop = partial_census_table.relabeled('POPESTIMATE2010', '2010').relabeled('POPESTIMATE2020', '2020').relabeled('SEX','GENDER')
us_pop

## Extract the rows of a table that satisfy a condition -- where()

In [None]:
# The default predicate for where is are.equal_to()
# This example returns a table with just the entries of
# folks who are 81 years old.

us_pop.where('AGE',100)

In [None]:
# This does the same thing as the cell above
us_pop.where('AGE', are.equal_to(81))

In [None]:
us_pop.where('AGE', are.above(70))

In [None]:
us_pop.where('AGE', are.between(5, 10))

## Chaining methods and plotting table data

In [None]:
# Chaining methods
us_pop.where('GENDER',0).select('AGE','2010','2020').where('AGE',are.below(100)).plot('AGE')

Note that this plot illustrates the aging of the US population between 2010 and 2020.

## Cyril Snodbottom's GPA

### Here is how Temple calculates your GPA:

To Compute Semester Grade Point Average:

* Multiply the value of the grade by the course's number of credit hours to get 'Quality Points'.
* Add the total quality points.
* Divide total number of quality points by the total number of GPA hours completed in courses that yield quality points.

### Create the table

In [None]:
# Create the arrays for the table
courses = make_array("Freshman Seminar", "Intermediate Algebra", "Elements of Data Science", "Intellectual Heritage I", "Analytical Reading and Writing")
grades = make_array("C+", "B", "A", "B+", "B-")
class_gpa = make_array(2.33, 3, 4, 3.33, 2.67)
credits = make_array(1, 4, 3, 3, 3)

# Build the table
transcript = Table().with_columns("Course", courses,
                                  "Grade", grades,
                                  "Class GPA", class_gpa,
                                  "Credits", credits)
transcript

### Calculate the "quality points."
Multiply the value of the grade by the course's number of credit hours to get 'Quality Points'.

In [None]:
# Recall that .column() extracts the data array from a column
transcript.column("Class GPA")

In [None]:
# We can multiple arrays together.
transcript.column("Class GPA") * transcript.column("Credits")

In [None]:
# So:
# Extract the columns, multiply, 
# and put the result into the table as a new column
qp = transcript.column("Class GPA") * transcript.column("Credits")
transcript = transcript.with_columns("Quality Points", qp)
transcript

### Compute the GPA
* Add the total quality points.
* Divide total number of quality points by the total number of GPA hours completed in courses that yield quality points.

In [None]:
# Use np.sum() to add up the columns
gpa = np.sum(transcript.column("Quality Points")) / np.sum(transcript.column("Credits"))
print("Cyril's GPA is:", gpa)

In [None]:
# The round() function rounds off numbers
# Round off the answer to two decimal places
gpa = np.sum(transcript.column("Quality Points")) / np.sum(transcript.column("Credits"))
print("Cyril's GPA is:", round(gpa, 2))