# Ex 7.3: Part I (In Class)

### Part 1: What are the Top 5 NC Places for Percent of the Population 25 and over that have a Graduate or Professional Degree (2020)? 

In [1]:
import pandas as pd
import json
import requests
import pprint

import plotly.express as px

# 1. Build the API Request URL

### 1. How to Start your Data Request  
- Requests always begin with: https://api.census.gov/data  

In [2]:
base_url = "https://api.census.gov/data"

### 2. Add the Dataset Name


In [3]:
dataset_name = "/2020/acs/acs5/profile"

### 3. Start your Variable Request


In [4]:
get_start = "?get="

### 4. Add your Variables  
- **DP02_0066PE**: Percent of Population 25 years and over with Graduate or professional degree


In [5]:
get_variables = "NAME,DP02_0066PE,DP02_0059E"

###   5. Add your Geography  


In [6]:
geography = "&for=place:*&in=state:37"

###   6. Put it all Together 

In [7]:
request_url = base_url + dataset_name + get_start + get_variables + geography
print("request_url = ", request_url)

request_url =  https://api.census.gov/data/2020/acs/acs5/profile?get=NAME,DP02_0066PE,DP02_0059E&for=place:*&in=state:37


# 2. Use *requests* library to make the API call

In [8]:
# Make API Call
r = requests.get(request_url)

api_results = r.json()

In [9]:
#print(api_results)

In [10]:
# pprint makes it possible to see the structure of the returned data -- but it can be very, very long!
#pprint.pprint(api_results)

In [11]:
type(api_results)

list

# 3. Get the data into a Dataframe  
- These Census Data results are in a list and have a specific form:  
  - The first element is a list of column names  
  - The remaining list elements are data  
  
  

In [12]:
df = pd.DataFrame(api_results)

print(df.shape)
df.head()

(777, 5)


Unnamed: 0,0,1,2,3,4
0,NAME,DP02_0066PE,DP02_0059E,state,place
1,"Aberdeen town, North Carolina",10.8,5463,37,00160
2,"Advance CDP, North Carolina",3.8,758,37,00440
3,"Ahoskie town, North Carolina",5.6,3432,37,00500
4,"Alamance village, North Carolina",15.0,838,37,00640


# 4. Get the first Row into columns and then get rid of it  

In [13]:
df.columns = df.iloc[0]

df =df.iloc[1:]

print(df.shape)
df.head()

(776, 5)


Unnamed: 0,NAME,DP02_0066PE,DP02_0059E,state,place
1,"Aberdeen town, North Carolina",10.8,5463,37,160
2,"Advance CDP, North Carolina",3.8,758,37,440
3,"Ahoskie town, North Carolina",5.6,3432,37,500
4,"Alamance village, North Carolina",15.0,838,37,640
5,"Albemarle city, North Carolina",4.5,11066,37,680


# 5. Cleaning Data

In [14]:
two_new_cols = ['Place', 'State_Name']

df[two_new_cols] = df['NAME'].str.split(',',1, expand=True)

print(df.shape)
df.head()

(776, 7)


Unnamed: 0,NAME,DP02_0066PE,DP02_0059E,state,place,Place,State_Name
1,"Aberdeen town, North Carolina",10.8,5463,37,160,Aberdeen town,North Carolina
2,"Advance CDP, North Carolina",3.8,758,37,440,Advance CDP,North Carolina
3,"Ahoskie town, North Carolina",5.6,3432,37,500,Ahoskie town,North Carolina
4,"Alamance village, North Carolina",15.0,838,37,640,Alamance village,North Carolina
5,"Albemarle city, North Carolina",4.5,11066,37,680,Albemarle city,North Carolina


In [15]:
df["DP02_0066PE"] = pd.to_numeric(df['DP02_0066PE'])
df["DP02_0059E"] = pd.to_numeric(df['DP02_0059E'])

In [16]:
df.dtypes

0
NAME            object
DP02_0066PE    float64
DP02_0059E       int64
state           object
place           object
Place           object
State_Name      object
dtype: object

In [17]:
df.sort_values(by="DP02_0066PE", ascending=False, inplace=True)
df = df.iloc[ : 5]
df

Unnamed: 0,NAME,DP02_0066PE,DP02_0059E,state,place,Place,State_Name
305,"Forest Hills village, North Carolina",55.8,233,37,24170,Forest Hills village,North Carolina
69,"Blue Clay Farms CDP, North Carolina",53.7,41,37,6510,Blue Clay Farms CDP,North Carolina
222,"Carolina Meadows CDP, North Carolina",53.2,611,37,10532,Carolina Meadows CDP,North Carolina
238,"Chapel Hill town, North Carolina",48.8,31171,37,11800,Chapel Hill town,North Carolina
297,"Fearrington Village CDP, North Carolina",48.3,2451,37,22982,Fearrington Village CDP,North Carolina


In [18]:
cols_to_rename = {
                   'DP02_0066PE':'% 25 or Older with Grad Degree (DP02_0066PE)', 
                   'DP02_0059E' :'Total Population (DP02_0059E)'
                 }
df.rename(columns = cols_to_rename, inplace=True)

print(df.shape)
df.head()

(5, 7)


Unnamed: 0,NAME,% 25 or Older with Grad Degree (DP02_0066PE),Total Population (DP02_0059E),state,place,Place,State_Name
305,"Forest Hills village, North Carolina",55.8,233,37,24170,Forest Hills village,North Carolina
69,"Blue Clay Farms CDP, North Carolina",53.7,41,37,6510,Blue Clay Farms CDP,North Carolina
222,"Carolina Meadows CDP, North Carolina",53.2,611,37,10532,Carolina Meadows CDP,North Carolina
238,"Chapel Hill town, North Carolina",48.8,31171,37,11800,Chapel Hill town,North Carolina
297,"Fearrington Village CDP, North Carolina",48.3,2451,37,22982,Fearrington Village CDP,North Carolina


In [19]:
cols_to_keep = ['Place', '% 25 or Older with Grad Degree (DP02_0066PE)', 'Total Population (DP02_0059E)']
df = df[cols_to_keep]

print("Part 1: What are the Top 5 NC Places for Percent of the Population 25 and over that have a Graduate or Professional Degree (2020)? ")
print(df.shape)
df.head()

Part 1: What are the Top 5 NC Places for Percent of the Population 25 and over that have a Graduate or Professional Degree (2020)? 
(5, 3)


Unnamed: 0,Place,% 25 or Older with Grad Degree (DP02_0066PE),Total Population (DP02_0059E)
305,Forest Hills village,55.8,233
69,Blue Clay Farms CDP,53.7,41
222,Carolina Meadows CDP,53.2,611
238,Chapel Hill town,48.8,31171
297,Fearrington Village CDP,48.3,2451
