# Ex 7.3: Part II (In Class)

### Part 2: What are the Top 15 NC Counties for Percent of the Population 25 and over that have a Graduate or Professional Degree (2020)? 

In [1]:
import pandas as pd
import json
import requests
import pprint

import plotly.express as px

# 1. Build the API Request URL

### 1. How to Start your Data Request  
- Requests always begin with: https://api.census.gov/data  

In [2]:
base_url = "https://api.census.gov/data"

### 2. Add the Dataset Name


In [3]:
dataset_name = "/2020/acs/acs5/profile"

### 3. Start your Variable Request


In [4]:
get_start = "?get="

### 4. Add your Variables  
- **DP02_0066PE**: Percent of Population 25 years and over with Graduate or professional degree


In [5]:
get_variables = "NAME,DP02_0066PE"

###   5. Add your Geography  


In [6]:
geography = "&for=county:*&in=state:37"

###   6. Put it all Together 

In [7]:
request_url = base_url + dataset_name + get_start + get_variables + geography
print("request_url = ", request_url)

request_url =  https://api.census.gov/data/2020/acs/acs5/profile?get=NAME,DP02_0066PE&for=county:*&in=state:37


# 2. Use *requests* library to make the API call

In [8]:
# Make API Call
r = requests.get(request_url)

api_results = r.json()

In [9]:
#print(api_results)

In [10]:
# pprint makes it possible to see the structure of the returned data -- but it can be very, very long!
#pprint.pprint(api_results)

In [11]:
type(api_results)

list

# 3. Get the data into a Dataframe  
- These Census Data results are in a list and have a specific form:  
  - The first element is a list of column names  
  - The remaining list elements are data  
  
  

In [12]:
df = pd.DataFrame(api_results)

print(df.shape)
df.head()

(101, 4)


Unnamed: 0,0,1,2,3
0,NAME,DP02_0066PE,state,county
1,"Anson County, North Carolina",3.2,37,007
2,"Beaufort County, North Carolina",8.2,37,013
3,"Brunswick County, North Carolina",10.5,37,019
4,"Cabarrus County, North Carolina",12.3,37,025


# 4. Get the first Row into columns and then get rid of it  

In [13]:
df.columns = df.iloc[0]

df =df.iloc[1:]

print(df.shape)
df.head()

(100, 4)


Unnamed: 0,NAME,DP02_0066PE,state,county
1,"Anson County, North Carolina",3.2,37,7
2,"Beaufort County, North Carolina",8.2,37,13
3,"Brunswick County, North Carolina",10.5,37,19
4,"Cabarrus County, North Carolina",12.3,37,25
5,"Carteret County, North Carolina",11.5,37,31


# 5. Cleaning Data

In [14]:
two_new_cols = ['County Name', 'State_Name']

df[two_new_cols] = df['NAME'].str.split(',',1, expand=True)

print(df.shape)
df.head()

(100, 6)


Unnamed: 0,NAME,DP02_0066PE,state,county,County Name,State_Name
1,"Anson County, North Carolina",3.2,37,7,Anson County,North Carolina
2,"Beaufort County, North Carolina",8.2,37,13,Beaufort County,North Carolina
3,"Brunswick County, North Carolina",10.5,37,19,Brunswick County,North Carolina
4,"Cabarrus County, North Carolina",12.3,37,25,Cabarrus County,North Carolina
5,"Carteret County, North Carolina",11.5,37,31,Carteret County,North Carolina


In [15]:
df["DP02_0066PE"] = pd.to_numeric(df['DP02_0066PE'])

In [16]:
df.dtypes

0
NAME            object
DP02_0066PE    float64
state           object
county          object
County Name     object
State_Name      object
dtype: object

In [17]:
df.sort_values(by="DP02_0066PE", ascending=False, inplace=True)
df = df.iloc[ : 15]
df

Unnamed: 0,NAME,DP02_0066PE,state,county,County Name,State_Name
74,"Orange County, North Carolina",34.8,37,135,Orange County,North Carolina
10,"Durham County, North Carolina",23.7,37,63,Durham County,North Carolina
95,"Wake County, North Carolina",20.5,37,183,Wake County,North Carolina
6,"Chatham County, North Carolina",20.1,37,37,Chatham County,North Carolina
97,"Watauga County, North Carolina",19.4,37,189,Watauga County,North Carolina
33,"Buncombe County, North Carolina",16.3,37,21,Buncombe County,North Carolina
19,"Mecklenburg County, North Carolina",15.6,37,119,Mecklenburg County,North Carolina
69,"Moore County, North Carolina",14.6,37,125,Moore County,North Carolina
71,"New Hanover County, North Carolina",14.5,37,129,New Hanover County,North Carolina
13,"Guilford County, North Carolina",13.2,37,81,Guilford County,North Carolina


In [18]:
cols_to_rename = {
                   'DP02_0066PE':'% 25 or Older with Grad Degree (DP02_0066PE)', 
                   'state' :'FIPS_State',
                   'county' :'FIPS_County'
                 }
df.rename(columns = cols_to_rename, inplace=True)

print(df.shape)
df.head()

(15, 6)


Unnamed: 0,NAME,% 25 or Older with Grad Degree (DP02_0066PE),FIPS_State,FIPS_County,County Name,State_Name
74,"Orange County, North Carolina",34.8,37,135,Orange County,North Carolina
10,"Durham County, North Carolina",23.7,37,63,Durham County,North Carolina
95,"Wake County, North Carolina",20.5,37,183,Wake County,North Carolina
6,"Chatham County, North Carolina",20.1,37,37,Chatham County,North Carolina
97,"Watauga County, North Carolina",19.4,37,189,Watauga County,North Carolina


In [19]:
cols_to_keep = ['County Name', '% 25 or Older with Grad Degree (DP02_0066PE)', 'FIPS_County', 'FIPS_State']
df = df[cols_to_keep]

print("Part 2: What are the Top 15 NC Counties for Percent of the Population 25 and over that have a Graduate or Professional Degree (2020)? ")
print(df.shape)
df

Part 2: What are the Top 15 NC Counties for Percent of the Population 25 and over that have a Graduate or Professional Degree (2020)? 
(15, 4)


Unnamed: 0,County Name,% 25 or Older with Grad Degree (DP02_0066PE),FIPS_County,FIPS_State
74,Orange County,34.8,135,37
10,Durham County,23.7,63,37
95,Wake County,20.5,183,37
6,Chatham County,20.1,37,37
97,Watauga County,19.4,189,37
33,Buncombe County,16.3,21,37
19,Mecklenburg County,15.6,119,37
69,Moore County,14.6,125,37
71,New Hanover County,14.5,129,37
13,Guilford County,13.2,81,37
