# Ex 7.1: Part II (Outside Class)

In [1]:
import pandas as pd
import json
import requests
import pprint

import plotly.express as px

# 1. Build the API Request URL

### 1. Start your API Data Request  
- Requests always begin with: https://api.census.gov/data  

In [2]:
base_url = "https://api.census.gov/data"

### 2. Add the Dataset Vintage Year and Name  

In [3]:
dataset_name = "/2019/pep/population"

### 3. Start your Variable Request


In [4]:
get_start = "?get="

### 4. Add your Variables  
- Available Variables for Dataset: https://api.census.gov/data/2019/pep/population/variables.html


In [5]:
get_variables = "NAME,POP"

###   5. Add your Geography  
- Available Geographies for Dataset:  https://api.census.gov/data/2021/pep/population/geography.html  



In [6]:
geography = "&for=county:*"

###   6. Put it all Together and Test  
- Click the Link below to see if you formed the API correctly and if the data coming back is what you want.

In [7]:
request_url = base_url + dataset_name + get_start + get_variables + geography
print("request_url = ", request_url)

request_url =  https://api.census.gov/data/2019/pep/population?get=NAME,POP&for=county:*


# 2. Use *requests* library to make the API call

In [8]:
# Make API Call
r = requests.get(request_url)

api_results = r.json()

In [9]:
#print(api_results)

In [10]:
# pprint makes it possible to see the structure of the returned data -- but it can be very, very long!
# pprint.pprint(api_results)

# 3. Get the data into a Dataframe  
- These Census Data results are in a list and have a specific form:  
  - The first element is a list of column names  
  - The remaining list elements are data  
  
  

In [11]:
df = pd.DataFrame(api_results)

print(df.shape)
df.head()

(3221, 4)


Unnamed: 0,0,1,2,3
0,NAME,POP,state,county
1,"Fayette County, Illinois",21336,17,051
2,"Logan County, Illinois",28618,17,107
3,"Saline County, Illinois",23491,17,165
4,"Lake County, Illinois",696535,17,097


# 4. Get the first Row into columns and then get rid of it

### a. Grab the Column Names out of the First Row of the Dataframe
- Use iloc to point to the first row of the dataframe 

In [12]:
column_names = df.iloc[0]

print(column_names)

0      NAME
1       POP
2     state
3    county
Name: 0, dtype: object


### b. Set the columns property of the Dataframe equal to the column names we grabbed  

In [13]:
df.columns = column_names

print(df.shape)
df.head()

(3221, 4)


Unnamed: 0,NAME,POP,state,county
0,NAME,POP,state,county
1,"Fayette County, Illinois",21336,17,051
2,"Logan County, Illinois",28618,17,107
3,"Saline County, Illinois",23491,17,165
4,"Lake County, Illinois",696535,17,097


### c. Now Get Rid of the First Row of the Dataframe

In [14]:
df = df.iloc[1:]

print("7.1 Demo:  2021 State Populations")
print(df.shape)
df

7.1 Demo:  2021 State Populations
(3220, 4)


Unnamed: 0,NAME,POP,state,county
1,"Fayette County, Illinois",21336,17,051
2,"Logan County, Illinois",28618,17,107
3,"Saline County, Illinois",23491,17,165
4,"Lake County, Illinois",696535,17,097
5,"Massac County, Illinois",13772,17,127
...,...,...,...,...
3216,"Crockett County, Tennessee",14230,47,033
3217,"Lake County, Tennessee",7016,47,095
3218,"Knox County, Tennessee",470313,47,093
3219,"Benton County, Washington",204390,53,005


In [15]:
df.query("state == '37'", inplace=True)
df.sort_values(by="county", ascending=True, inplace=True)

print(df.shape)
df.head()

(100, 4)


Unnamed: 0,NAME,POP,state,county
2098,"Alamance County, North Carolina",169509,37,1
2047,"Alexander County, North Carolina",37497,37,3
2025,"Alleghany County, North Carolina",11137,37,5
2067,"Anson County, North Carolina",24446,37,7
654,"Ashe County, North Carolina",27203,37,9


## Cleaning data

In [16]:
df.dtypes

0
NAME      object
POP       object
state     object
county    object
dtype: object

In [17]:
two_new_cols = ['County_Name', 'State_Name']

df[two_new_cols] = df['NAME'].str.split(' County,',1, expand=True)

print(df.shape)
df.head()

(100, 6)


Unnamed: 0,NAME,POP,state,county,County_Name,State_Name
2098,"Alamance County, North Carolina",169509,37,1,Alamance,North Carolina
2047,"Alexander County, North Carolina",37497,37,3,Alexander,North Carolina
2025,"Alleghany County, North Carolina",11137,37,5,Alleghany,North Carolina
2067,"Anson County, North Carolina",24446,37,7,Anson,North Carolina
654,"Ashe County, North Carolina",27203,37,9,Ashe,North Carolina


In [18]:
cols_to_rename = {
                   'POP':'Population', 
                   'state' : 'FIPS_State',
                   'county' : 'FIPS_County'
                 }
df.rename(columns = cols_to_rename, inplace=True)

print(df.shape)
df.head()

(100, 6)


Unnamed: 0,NAME,Population,FIPS_State,FIPS_County,County_Name,State_Name
2098,"Alamance County, North Carolina",169509,37,1,Alamance,North Carolina
2047,"Alexander County, North Carolina",37497,37,3,Alexander,North Carolina
2025,"Alleghany County, North Carolina",11137,37,5,Alleghany,North Carolina
2067,"Anson County, North Carolina",24446,37,7,Anson,North Carolina
654,"Ashe County, North Carolina",27203,37,9,Ashe,North Carolina


In [19]:
cols_to_keep = ['County_Name', 'Population', 'FIPS_State', 'FIPS_County', 'State_Name']
df = df[cols_to_keep]

print("7.1 Part 2: Populations for 2019 NC Counties")
print(df.shape)
df.head()

7.1 Part 2: Populations for 2019 NC Counties
(100, 5)


Unnamed: 0,County_Name,Population,FIPS_State,FIPS_County,State_Name
2098,Alamance,169509,37,1,North Carolina
2047,Alexander,37497,37,3,North Carolina
2025,Alleghany,11137,37,5,North Carolina
2067,Anson,24446,37,7,North Carolina
654,Ashe,27203,37,9,North Carolina


# Optional:  Save the Joined Dataframe as a CSV file

In [20]:
#csv_file_to_create = "6.1_Demo_Output_Data.csv"

#filename_with_path = "Data/" + csv_file_to_create
#df.to_csv(filename_with_path, index=False)