# Census

In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Table Review: Welcome Survey

In [None]:
# Table consists only of those who answered all 7 questions

welcome = Table.read_table('data/welcome_survey_fa21.csv')
welcome.show(5)

In [None]:
# Largest number of textees (daily average)
max(welcome.column('Number of textees'))

In [None]:
welcome.where('Number of textees', are.equal_to(100))

In [None]:
# How many students get at least 8 hours of sleep each night (on average)?
welcome.where('Hours of sleep', are.above_or_equal_to(8)).num_rows

In [None]:
# average hours of sleep
np.average(welcome.column('Hours of sleep'))

In [None]:
# On average, how long do side-sleepers sleep?
side_sleepers = welcome.where('Sleep position', are.containing('side'))
np.average(side_sleepers.column('Hours of sleep'))

In [None]:
# Proportion of side-sleepers
welcome.where('Sleep position', are.containing('side')).num_rows / welcome.num_rows

In [None]:
# Create a table with only the two sleep-related columns, 
# with names 'Hours' and 'Position'
two_col = welcome.select('Hours of sleep', 'Sleep position')
sleep = two_col.relabeled('Hours of sleep', 'Hours').relabeled('Sleep position', 'Position')
sleep.show(3)

In [None]:
# Second way
two_col_new = welcome.select('Hours of sleep', 'Sleep position')
sleep_new = two_col_new.relabeled(0, 'Hours').relabeled(1, 'Position')
sleep_new.show(3)

## Discussion Question: NBA Salaries

In [None]:
nba = Table.read_table('data/nba_salaries.csv')
nba = nba.relabeled(3, 'SALARY').drop('TEAM')
nba.show(3)

In [None]:
# Question (a)
guards = nba.where('POSITION', 'PG')
guards.where('SALARY', are.above(15)).column('PLAYER')

In [None]:
# Question (b)
nba.drop('POSITION')
nba.num_columns

## Census ##

In [None]:
full = Table.read_table('data/nc-est2019-agesex-res.csv')
full

In [None]:
partial = full.select('SEX', 'AGE', 'POPESTIMATE2014', 'POPESTIMATE2019')
partial.show(5)

In [None]:
us_pop = partial.relabeled(2, '2014').relabeled(3, '2019')
us_pop.show(5)

In [None]:
us_pop.where('AGE', are.above_or_equal_to(100)).sort('AGE')

## 2019 Sex Ratios ##

In [None]:
us_pop_2019 = us_pop.drop('2014')
us_pop_2019.show(3)

In [None]:
all_ages = us_pop_2019.where('AGE', are.equal_to(999))
all_ages

In [None]:
infants = us_pop_2019.where('AGE', are.equal_to(0))
infants

In [None]:
females_all_rows = us_pop_2019.where('SEX', are.equal_to(2))
females = females_all_rows.where('AGE', are.not_equal_to(999))
females.show(3)

In [None]:
males_all_rows = us_pop_2019.where('SEX', are.equal_to(1))
males = males_all_rows.where('AGE', are.not_equal_to(999))
males.show(3)

In [None]:
f_to_m_ratios = females.column(2) / males.column(2)

ratios = Table().with_columns(
    'Age', females.column('AGE'),
    'F:M Ratio', f_to_m_ratios
)

ratios

In [None]:
ratios.sort('Age', descending=True)

## Line Plot ##

In [None]:
ratios.plot('Age', 'F:M Ratio')

## Optional: Adding a Title to a Plot ##

In [None]:
ratios.plot('Age', 'F:M Ratio')
plots.title('2019 US Population');

## Optional: set_format and PercentFormatter ##

In [None]:
infants

In [None]:
total_infants_2019 = infants.column('2019').item(0)
total_infants_2019

In [None]:
infants = infants.with_column(
    'Proportion', infants.column('2019')/total_infants_2019
)
infants

In [None]:
# A more easily readable display
# Displays proportions as percents
infants.set_format('Proportion', PercentFormatter)

In [None]:
# Once the format is set, it remains.
infants