In [23]:
import pandas as pd, numpy as np
import crime as cr

In [33]:
cr.sources('dist_grad_rate')

Students with Disabilities Graduation Rate by District
https://dev.socrata.com/foundry/data.colorado.gov/aze8-926p 

Graduation data by school district and Instructional program service
type for the 2011-2012 school year. The completion counts and rates
include all students who graduate on-time with a regular diploma plus
students who complete on-time with a GED or non-diploma certificate.
It is important to note that graduates are included in the completer
count and rate, therefore the completion counts and rates for any
school or district will be greater than or equal to the graduation
rate.

Rows:    185
Cols:    38
Period:  2011 to 2012

COLUMNS:
-------
County Name
  Field:  county_name
  Type:   text
  Null:   2
  Count:  183
  ITEMS:
     ADAMS  (20)
     ARAPAHOE  (19)
     BACA  (18)
     BOULDER  (17)
     CONEJOS  (16)
     ELBERT  (15)
     EL PASO  (14)
     FREMONT  (13)
     GARFIELD  (12)
     KIT CARSON  (11)
     LA PLATA  (10)
     LAS ANIMAS  (9)
     LINCOLN  (8)
 

### Topics covered in census data
- Demographic Age and Sex,
- Group Quarters Population,
- Race,
- Relationship,
- Total Population,
- Social,
- Ancestry,
- Citizenship Status,
- Disability Status,
- Educational Attainment,
- Fertility,
- Field of Degree,
- Grandparents as Caregivers,
- Language,
- Marital History,
- Marital Status,
- Place of Birth,
- School Enrollment,
- Residence 1 Year Ago/Migration,
- Veterans,
- Year of Entry,
- Economic,
- Class of Worker,
- Commuting to Work/Journey to Work,
- Employment Status,
- Food Stamps/Supplemental Nutrition,
- Assistance Program (SNAP),
- Health Insurance Coverage,
- Income and Earnings,
- Industry and Occupation,
- Poverty,
- Work Status,
- Housing,
- Computer Ownership & Internet Access,
- House Heating Fuel,
- Kitchen,
- Facilities,
- Occupancy/Vacancy Status,
- Occupants per Room,
- Owner Monthly Costs,
- Plumbing Facilities,
- Rent Statistics,
- Rooms / Bedrooms,
- Telephone Service Available,
- Tenure,
- Units in Structure,
- Value of Home,
- Vehicles Available,
- Year Householder Moved Into Unit,
- Year Structure Built.

#### Load Data

In [26]:
# 2019 Census data
df_2019_raw = cr.load('county_demographics', full=True)

# 2012 Census data
df_2012_raw = pd.read_csv('raw/census_counties_2012.csv')

# Load descriptions
desc_raw = cr.load('census_field_desc', full=True)
desc_raw = desc_raw[desc_raw.type == 'acs_standard'][['apifieldname', 'description']].rename(columns={'apifieldname':'column'})

#### Descriptions of ALL available columns in census data

In [27]:
desc_raw

Unnamed: 0,column,description
0,geoname,Geographic Area common name
1,geonum,"Comma delimited list (no quotes, leading zeros..."
2,geojson,Well Known Text field (WKT) describing the bou...
3,pop,Population Estimate for the given time range
4,hispanic,Estimate for the Hispanic Population
...,...,...
153,emp,Estimated number of people in the civilian lab...
154,unemp,Estimated number of people in the civilian lab...
155,armedfrcs,Estimated number of people in the armed forces
156,not_lf,Estimated number of people not in the labor force


#### 2019 Census Data

In [28]:
df = df_2019_raw.copy()[[
    'geoname',
    'med_age',
    # Compare population to num of households
    'pop', 'households',
    # Employment rate
    'emp', 'unemp',
    # Demographics. Reduce to white, black, hispanic, other
    'hispanic', 'white_nh', 'black_nh', 'ntvam_nh', 'asian_nh', 'hawpi_nh', 'other_nh', 'twoplus_nh',
    # Total housing units vs num vacant.
    'housing_un', 'vac_hu',
    # Adults (25+) who didn't grad high school
    'nohsdipl',
    # Income. Pick household or per-capita
    'med_hh_inc', 'per_cap_in',
    # Citizenship. Look for foreigners, or areas popular with out-of-staters
    'not_citz', 'brn_oth_st',
    # School enrollment. Compare HS enrollments to total enrollments. (HS dropouts -> crime??)
    'pop_3pl', 'enrolled', 'n_enrolled', 'gr_9_12',
    # median house value and age
    'med_hm_val', 'med_yr_blt',
    # Frequency distribution of OWNER OCCUPIED housing by price. own_occ_hu is total num of occupied housing units.
    'own_occ_hu', 'v_l_50k', 'v50k_100k', 'v100k_150k', 'v150k_200k', 'v200k_250k', 'v250k_300k', 'v300k_400k', 'v400k_500k', 'v500k_750k', 'v750k_1m', 'v_1m_plus',
    # total poverty level. (Divide 'ps_below' by 'ps_uni', NOT 'pop')
    'ps_uni', 'ps_below',
    # poverty level 18 and under. (Divide 'pov_l18' by 'tot_l18', NOT 'pop')
    'tot_l18', 'pov_l18',
]]
# Reduce race
df['race_other'] = df.pop('ntvam_nh') + df.pop('asian_nh') + df.pop('hawpi_nh') + df.pop('other_nh') + df.pop('twoplus_nh')
df.insert(9, 'race_other', df.pop('race_other'))

# Reduce housing prices
df['v50k_150k'] = df.pop('v50k_100k') + df.pop('v100k_150k')
df['v150k_250k'] = df.pop('v150k_200k') + df.pop('v200k_250k')
df['v250k_400k'] = df.pop('v250k_300k') + df.pop('v300k_400k')
df['v400k_750k'] = df.pop('v400k_500k') + df.pop('v500k_750k')
df['v750k_plus'] = df.pop('v750k_1m') + df.pop('v_1m_plus')

# Make county column match format of other datasets
df = df.rename(columns={'geoname':'county'})
df.county = df.county.str.upper()

# Save
df.to_csv('output/demographics_2019.csv')
df_2019 = df
df_2019

Unnamed: 0,county,med_age,pop,households,emp,unemp,hispanic,white_nh,black_nh,race_other,...,v_l_50k,ps_uni,ps_below,tot_l18,pov_l18,v50k_150k,v150k_250k,v250k_400k,v400k_750k,v750k_plus
0,PHILLIPS,37.1,4290.0,1689.0,1948.0,28.0,1027.0,3178.0,5.0,80.0,...,123.0,4210.0,256.0,1143.0,42.0,446.0,359.0,204.0,80.0,2.0
1,ARCHULETA,50.9,13253.0,5858.0,6088.0,329.0,2435.0,10116.0,185.0,517.0,...,143.0,13150.0,1153.0,2302.0,225.0,543.0,720.0,1580.0,846.0,398.0
2,DENVER,34.5,705576.0,301501.0,402046.0,15669.0,210937.0,382228.0,62754.0,49657.0,...,2003.0,693555.0,89203.0,138285.0,25172.0,7342.0,23973.0,44602.0,52993.0,19469.0
3,KIOWA,42.5,1489.0,638.0,697.0,20.0,72.0,1365.0,6.0,46.0,...,95.0,1460.0,177.0,391.0,54.0,224.0,87.0,48.0,20.0,0.0
4,HUERFANO,55.0,6679.0,3225.0,2560.0,82.0,2290.0,4083.0,10.0,296.0,...,132.0,6510.0,1030.0,1047.0,240.0,927.0,599.0,413.0,157.0,86.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,CONEJOS,38.8,8128.0,3183.0,3029.0,354.0,4263.0,3622.0,40.0,203.0,...,330.0,8089.0,1756.0,2164.0,636.0,1231.0,676.0,220.0,52.0,19.0
60,ADAMS,33.8,504108.0,166450.0,261893.0,10917.0,201784.0,252170.0,16139.0,34015.0,...,6278.0,499315.0,54159.0,134212.0,19943.0,5851.0,23243.0,49680.0,22451.0,2705.0
61,EAGLE,37.0,54681.0,18171.0,34128.0,647.0,16179.0,36748.0,615.0,1139.0,...,575.0,54401.0,4354.0,11805.0,1184.0,766.0,848.0,1760.0,4862.0,3869.0
62,MOFFAT,36.6,13127.0,5366.0,6163.0,282.0,2044.0,10543.0,130.0,410.0,...,262.0,13003.0,2206.0,3361.0,680.0,960.0,1186.0,824.0,209.0,34.0


#### 2012 Census Data

In [29]:
df = df_2012_raw.copy()[[
    'geoname',
    'med_age',
    # Compare population to num of households
    'pop', 'households',
    # Employment rate
    'emp', 'unemp',
    # Demographics. Reduce to white, black, hispanic, other
    'hispanic', 'white_nh', 'black_nh', 'ntvam_nh', 'asian_nh', 'hawpi_nh', 'other_nh', 'twoplus_nh',
    # Total housing units vs num vacant.
    'housing_un', 'vac_hu',
    # Adults (25+) who didn't grad high school
    'nohsdipl',
    # Income. Pick household or per-capita
    'med_hh_inc', 'per_cap_in',
    # Citizenship. Look for foreigners, or areas popular with out-of-staters
    'not_citz', 'brn_oth_st',
    # School enrollment. Compare HS enrollments to total enrollments. (HS dropouts -> crime??)
    'pop_3pl', 'enrolled', 'n_enrolled', 'gr_9_12',
    # median house value and age
    'med_hm_val', 'med_yr_blt',
    # Frequency distribution of OWNER OCCUPIED housing by price. own_occ_hu is total num of occupied housing units.
    'own_occ_hu', 'v_l_50k', 'v50k_100k', 'v100k_150k', 'v150k_200k', 'v200k_250k', 'v250k_300k', 'v300k_400k', 'v400k_500k', 'v500k_750k', 'v750k_1m', 'v_1m_plus',
    # total poverty level. (Divide 'ps_below' by 'ps_uni', NOT 'pop')
    'ps_uni', 'ps_below',
    # poverty level 18 and under. (Divide 'pov_l18' by 'tot_l18', NOT 'pop')
    'tot_l18', 'pov_l18',
]]
# Reduce race
df['race_other'] = df.pop('ntvam_nh') + df.pop('asian_nh') + df.pop('hawpi_nh') + df.pop('other_nh') + df.pop('twoplus_nh')
df.insert(9, 'race_other', df.pop('race_other'))

# Reduce housing prices
df['v50k_150k'] = df.pop('v50k_100k') + df.pop('v100k_150k')
df['v150k_250k'] = df.pop('v150k_200k') + df.pop('v200k_250k')
df['v250k_400k'] = df.pop('v250k_300k') + df.pop('v300k_400k')
df['v400k_750k'] = df.pop('v400k_500k') + df.pop('v500k_750k')
df['v750k_plus'] = df.pop('v750k_1m') + df.pop('v_1m_plus')

# Make county column match format of other datasets
df = df.rename(columns={'geoname':'county'})
df.county = df.county.str.upper()

# Save
df.to_csv('output/demographics_2012.csv')
df_2012 = df
df_2012

Unnamed: 0,county,med_age,pop,households,emp,unemp,hispanic,white_nh,black_nh,race_other,...,v_l_50k,ps_uni,ps_below,tot_l18,pov_l18,v50k_150k,v150k_250k,v250k_400k,v400k_750k,v750k_plus
0,ARAPAHOE,35.7,574357,223747,292089,25952,105174,364766,55629,48788,...,4207,568999,66945,144576,23054,22174,55935,38213,16339,6290
1,MINERAL,60.3,702,363,370,21,15,671,9,7,...,45,702,47,26,0,31,62,80,56,31
2,MONTROSE,42.6,40994,16732,18110,2014,8037,31799,186,972,...,1311,40368,5565,9788,1927,2276,4496,2734,1370,219
3,PARK,47.0,16168,6997,8796,787,777,14818,5,568,...,246,16049,1355,3049,276,585,2295,2184,764,159
4,MORGAN,36.0,28200,10489,12758,1028,9557,17399,701,543,...,923,27416,4002,7670,1454,2813,2054,643,238,53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,ELBERT,43.0,23058,8195,11789,1032,1280,20841,73,864,...,216,22944,1384,5784,645,466,1365,2939,2014,398
60,FREMONT,43.0,47006,17200,14687,1183,5839,36270,2736,2161,...,1207,35583,5376,7257,1692,4236,4322,1836,592,176
61,GARFIELD,34.7,56330,20272,29185,2481,15743,38992,267,1328,...,923,55299,6031,15034,2139,1219,2431,3833,4002,1086
62,WELD,33.2,253552,89553,120443,9374,71792,171475,1969,8316,...,4521,247164,35581,69474,13265,14469,24157,13222,5397,1339


**Field Descriptions**

In [31]:
# Make frame from chosen columns
df = df_2019.copy()
df = df.rename(columns={'county':'geoname'})
cols = df.columns.to_frame().reset_index(drop=True)
cols.columns = ['column']

# Get descriptions
desc = desc_raw.copy()

# Merge and rename any necessary columns
cols = cols.merge(desc)
cols.iat[0,0] = 'county'

# Save
cols.to_csv('output/demographics_field_desc.csv')

cols

Unnamed: 0,column,description
0,county,Geographic Area common name
1,med_age,Median age of the population for the area
2,pop,Population Estimate for the given time range
3,households,Estimate for the total number of household
4,emp,Estimated number of people in the civilian lab...
5,unemp,Estimated number of people in the civilian lab...
6,hispanic,Estimate for the Hispanic Population
7,white_nh,"Estimate for the White, Non-Hispanic Population"
8,black_nh,"Estimate for the Black, Non-Hispanic Population"
9,housing_un,Estimated total number of housing units


In [13]:
cr.sources('county_demographics')

Census Counties in Colorado 2019
https://dev.socrata.com/foundry/data.colorado.gov/8j3i-rjn4 

American Community Survey Census data includes demographics, education
level, commute information, and more subset to Colorado by the
Department of Local Affairs (DOLA).

Rows:    64
Cols:    157
Period:  2019 to 2019

COLUMNS:
-------
The Geom
  Field:  the_geom
  Type:   multipolygon
  Null:   -
  Count:  -

Geoname
  Field:  geoname
  Type:   text
  Null:   -
  Count:  64
  ITEMS:
     Pitkin  (1)
     Prowers  (1)
     Jackson  (1)
     Conejos  (1)
     Broomfield  (1)
     Larimer  (1)
     Kit Carson  (1)
     Hinsdale  (1)
     Boulder  (1)
     Bent  (1)
     Arapahoe  (1)
     Adams  (1)
     Alamosa  (1)
     Delta  (1)
     Pueblo  (1)
     Washington  (1)
     El Paso  (1)
     Logan  (1)
     Mineral  (1)
     Custer  (1)

Geonum
  Field:  geonum
  Type:   number
  Null:   -
  Count:  64
  Min:    108,001
  Max:    108,125
  Avg:    -
  Sum:    -

Pop
  Field:  pop
  Type:   num

In [12]:
df = cr.sources()
df.loc[df.index.isin([
    'crime_16_19',
    'crime_97_15',
    'dist_arrests',
    'dist_student_mobility',
    'dist_grad_rate',
    'county_demographics',
    'county_population',
    ])]

Unnamed: 0,Topic,Location,Rows,Type,From,To,Full Name,Web URL
crime_16_19,Crime,Colorado,1851996,Records,2016,2019,Crimes in Colorado,https://dev.socrata.com/foundry/data.colorado....
crime_97_15,Crime,Colorado,4952282,Records,1997,2015,Crimes in Colorado 1997 to 2015,https://dev.socrata.com/foundry/data.colorado....
dist_arrests,Crime,Colorado,93114,Records,2001,2016,Crime Arrests by Police District 2001-2016 in ...,https://dev.socrata.com/foundry/data.colorado....
dist_student_mobility,Education,Colorado,184,Aggregate,2011,2012,District Student Mobility/Stability Statistics...,https://dev.socrata.com/foundry/data.colorado....
dist_grad_rate,Education,Colorado,185,Aggregate,2011,2012,District Graduation Data by Instructional Prog...,https://dev.socrata.com/foundry/data.colorado....
county_demographics,Other,Colorado,64,Aggregate,2019,2019,Census Counties in Colorado 2019,https://dev.socrata.com/foundry/data.colorado....
county_population,Other,Colorado,381504,Aggregate,1990,2040,Population Colorado,https://dev.socrata.com/foundry/data.colorado....


In [5]:
import crime as cr
cr.sources()

Unnamed: 0,Topic,Location,Rows,Type,From,To,Full Name,Web URL
crime_16_19,Crime,Colorado,1851996,Records,2016,2019,Crimes in Colorado,https://dev.socrata.com/foundry/data.colorado....
crime_97_15,Crime,Colorado,4952282,Records,1997,2015,Crimes in Colorado 1997 to 2015,https://dev.socrata.com/foundry/data.colorado....
dist_arrests,Crime,Colorado,93114,Records,2001,2016,Crime Arrests by Police District 2001-2016 in ...,https://dev.socrata.com/foundry/data.colorado....
dist_crime,Crime,Colorado,78843,Records,2001,2016,Crime Offenses by Police District 2001-2016 in...,https://dev.socrata.com/foundry/data.colorado....
marijuana_arrests,Crime,Colorado,7697,Records,2012,2017,Department Of Public Safety - Colorado Bureau ...,https://dev.socrata.com/foundry/data.colorado....
marijuana_offenses,Crime,Colorado,6790,Records,2012,2017,Department Of Public Safety - Colorado Bureau ...,https://dev.socrata.com/foundry/data.colorado....
crime_rates,Crime,Colorado,33,Aggregate,1980,2012,Crime Rates 1980-2012,https://dev.socrata.com/foundry/data.colorado....
crime_vs_incarceration,Crime,Colorado,31,Aggregate,1982,2012,Total Crime Rate vs Incarceration Rate Chart,https://dev.socrata.com/foundry/data.colorado....
crime_15_22,Crime,Colorado Springs,311425,Records,2015,2022,Crime Level Data,https://dev.socrata.com/foundry/policedata.col...
officer_shootings,Crime,Colorado Springs,112,Records,2010,2022,Officer Involved Shootings,https://dev.socrata.com/foundry/policedata.col...
