In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
module_path = os.path.abspath(os.path.join(os.pardir, os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
import psycopg2
import pandas as pd

In [4]:
from src.data import sql_utils

In [5]:
DBNAME = "opportunity_youth"

In [6]:
conn = psycopg2.connect(dbname=DBNAME)

In [7]:
oy_by_puma = pd.read_sql("""SELECT SUM(pwgtp) AS oy_count, n.puma_name
               FROM pums_2017 p
               JOIN puma_names_2010 n
               ON p.puma = n.puma
               WHERE p.puma BETWEEN '11610' AND '11615'
               AND (p.esr = '3' OR p.esr = '6')
               AND p.sch = '1'
               AND p.agep BETWEEN '16' AND '24'
               GROUP BY n.puma_name
               ;""", conn)
oy_by_puma

Unnamed: 0,oy_count,puma_name
0,1853.0,"King County (Central)--Renton City, Fairwood, ..."
1,1977.0,"King County (Far Southwest)--Federal Way, Des ..."
2,1210.0,"King County (Southeast)--Maple Valley, Covingt..."
3,2006.0,King County (Southwest Central)--Kent City ...
4,1530.0,King County (Southwest)--Auburn City & Lakelan...
5,2038.0,"King County (West Central)--Burien, SeaTac, Tu..."


In [8]:
total_oy = oy_by_puma.sum()
total_oy

oy_count                                                 10614
puma_name    King County (Central)--Renton City, Fairwood, ...
dtype: object

#### From above we can see that over 75% of OY have over a 30 minute travel time to work 8,544 out of 11,115

In [83]:
birth = pd.read_sql("""SELECT pwgtp AS oy_count, jwtr, fer, sex, jwmnp, n.puma_name
               FROM pums_2017 p
               JOIN puma_names_2010 n
               ON p.puma = n.puma
               WHERE p.puma BETWEEN '11610' AND '11615'
               AND (p.esr != '3' AND p.esr != '6')
               AND p.sch = '1'
               AND p.agep BETWEEN '16' AND '24'
               ;""", conn)
gave_birth = birth['fer'].value_counts()
birth['sex'].value_counts()


1    587
2    481
Name: sex, dtype: int64

461    1
20     1
Name: fer, dtype: int64

### How many OY have given birth within the past 12 months of the report?

In [121]:
birth = pd.read_sql("""SELECT SUM(pwgtp) AS oy_count, puma
                          FROM pums_2017 
                          WHERE puma BETWEEN '11610' AND '11615'
                          AND (esr = '3' OR esr = '6')
                          AND sch = '1'
                          AND fer = '1'
                          AND agep BETWEEN '16' AND '24'
                          GROUP BY puma
                          ;""", conn)
birth['oy_count'].sum()

1028.0

In [113]:
birth['oy_count'].sum()


608.0

### Needed to specify females

In [125]:
sex = pd.read_sql("""SELECT SUM(pwgtp) AS oy_count, puma
                          FROM pums_2017 
                          WHERE puma BETWEEN '11610' AND '11615'
                          AND (esr = '3' OR esr = '6')
                          AND sch = '1'
                          AND sex = '2'
                          AND fer = '1'
                          AND agep BETWEEN '16' AND '24'
                          GROUP BY puma
                          ;""", conn)
sex['oy_count'].sum()

1028.0

In [128]:
sex2 = pd.read_sql("""SELECT SUM(pwgtp) AS oy_count, puma
                          FROM pums_2017 
                          WHERE puma BETWEEN '11610' AND '11615'
                          AND (esr = '3' OR esr = '6')
                          AND sch = '1'
                          AND sex = '2'
                          AND agep BETWEEN '16' AND '24'
                          GROUP BY puma
                          ;""", conn)
sex2['oy_count'].sum()

5665.0

### 11,927 people in the same age group as OY have over a 30 min commute to work, could play a factor in number of OY.

In [150]:
trav = pd.read_sql("""SELECT SUM(pwgtp) AS oy_count, puma
                          FROM pums_2017 
                          WHERE puma BETWEEN '11610' AND '11615'
                          AND (esr != '3' AND esr != '6')
                          
                          AND jwmnp > '030'
                          AND agep BETWEEN '16' AND '24'
                          GROUP BY puma
                          ;""", conn)
trav['oy_count'].sum()

11927.0

In [146]:
trav_pop = pd.read_sql("""SELECT SUM(pwgtp) AS oy_count, puma
                          FROM pums_2017 
                          WHERE puma BETWEEN '11610' AND '11615'
                          AND (esr != '3' AND esr != '6')
                        
                          AND agep BETWEEN '16' AND '24'
                          GROUP BY puma
                          ;""", conn)
trav_pop['oy_count'].sum()

44770.0

In [152]:
trav_pop_less30 = pd.read_sql("""SELECT SUM(pwgtp) AS oy_count, puma
                          FROM pums_2017 
                          WHERE puma BETWEEN '11610' AND '11615'
                          AND (esr != '3' AND esr != '6')
                          AND jwmnp < '30'
                          AND agep BETWEEN '16' AND '24'
                          GROUP BY puma
                          ;""", conn)
trav_pop_less30['oy_count'].sum()

23623.0

### how many OY have children? May need to stay with child and not be able to work

In [165]:
 = pd.read_sql("""SELECT SUM(pwgtp) AS oy_count, puma
                          FROM pums_2017 
                          WHERE puma BETWEEN '11610' AND '11615'
                          AND (esr = '3' OR esr = '6')
                          AND sch = '1'
                          AND agep BETWEEN '16' AND '24'
                          GROUP BY puma
                          ;""", conn)


Unnamed: 0,oy_count,puma
0,1853.0,11610
1,2038.0,11611
2,1977.0,11612
3,2006.0,11613
4,1530.0,11614
5,1210.0,11615


### Cognitive difficulty

In [176]:
cog = pd.read_sql("""SELECT SUM(pwgtp) AS oy_count, puma
                          FROM pums_2017 
                          WHERE puma BETWEEN '11610' AND '11615'
                          AND (esr != '3' AND esr != '6')
                          AND sch = '1'
                          AND agep BETWEEN '16' AND '24'
                          AND drem = '1'
                          GROUP BY puma
                          ;""", conn)
cognitive_diff = cog['oy_count'].sum()
cognitive_diff

561.0

#### Vision difficulty

In [177]:
vis = pd.read_sql("""SELECT SUM(pwgtp) AS oy_count, puma
                          FROM pums_2017 
                          WHERE puma BETWEEN '11610' AND '11615'
                          AND (esr != '3' AND esr != '6')
                          AND sch = '1'
                          AND agep BETWEEN '16' AND '24'
                          AND deye = '1'
                          GROUP BY puma
                          ;""", conn)
vis_difficulty = vis['oy_count'].sum()
vis_difficulty


434.0

### hearing difficulty

In [178]:
hear = pd.read_sql("""SELECT SUM(pwgtp) AS oy_count, puma
                          FROM pums_2017 
                          WHERE puma BETWEEN '11610' AND '11615'
                          AND (esr != '3' AND esr != '6')
                          AND sch = '1'
                          AND agep BETWEEN '16' AND '24'
                          AND dear = '1'
                          GROUP BY puma
                          ;""", conn)
hear_difficultuy = hear['oy_count'].sum()
hear_difficultuy

339.0