In [1]:
# example api call from https://api.census.gov/data/2020/dec/pl/examples.html

# https://api.census.gov/data/2020/dec/pl?get=NAME&for=county:*&in=state:*&key=YOUR_KEY_GOES_HERE

# get= : what data to get - Name = name of whatever is in the for= section 
# for= : where to get data for
# county:* = all counties

# P1_001N is total pop from https://api.census.gov/data/2020/dec/pl/variables.html

#P1_003N is white pop

# https://api.census.gov/data/2020/dec/pl?get=NAME&P1_001N&for=county:*&in=state:*

# tennessee = state 47

# https://api.census.gov/data/2020/dec/pl?get=NAME&P1_001N&P1_003N&for=county:*&in=state:47

# above gets name, total pop, white pop for all counties in state 47 (Tennessee)

import requests
import json
import pandas as pd
import datetime
import numpy as np

response = requests.get(f"https://api.census.gov/data/2020/dec/pl?get=NAME&P1_001N&P1_003N&for=county:*&in=state:47")


In [2]:
print(response.status_code)

200


In [3]:
#print(response.json())

In [4]:
api_data = response.json()

In [5]:
len(api_data)

96

In [6]:
print(api_data[1:5])

[['Meigs County, Tennessee', '12758', '11894', '47', '121'], ['Montgomery County, Tennessee', '220069', '137775', '47', '125'], ['Moore County, Tennessee', '6461', '6030', '47', '127'], ['Obion County, Tennessee', '30787', '25009', '47', '131']]


In [7]:
# https://www.geeksforgeeks.org/creating-pandas-dataframe-using-list-of-lists/

df = pd.DataFrame(api_data[1:])
df.head()

Unnamed: 0,0,1,2,3,4
0,"Meigs County, Tennessee",12758,11894,47,121
1,"Montgomery County, Tennessee",220069,137775,47,125
2,"Moore County, Tennessee",6461,6030,47,127
3,"Obion County, Tennessee",30787,25009,47,131
4,"Overton County, Tennessee",22511,21332,47,133


In [8]:
df.columns = ['name', 'population', 'white_pop', 'state_code', 'county_code']

In [9]:
df.head()

Unnamed: 0,name,population,white_pop,state_code,county_code
0,"Meigs County, Tennessee",12758,11894,47,121
1,"Montgomery County, Tennessee",220069,137775,47,125
2,"Moore County, Tennessee",6461,6030,47,127
3,"Obion County, Tennessee",30787,25009,47,131
4,"Overton County, Tennessee",22511,21332,47,133


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   name         95 non-null     object
 1   population   95 non-null     object
 2   white_pop    95 non-null     object
 3   state_code   95 non-null     object
 4   county_code  95 non-null     object
dtypes: object(5)
memory usage: 3.8+ KB


In [11]:
df['population'] = pd.to_numeric(df['population'])
df['white_pop'] = pd.to_numeric(df['white_pop'])


In [12]:
df['percent_white'] = df['white_pop']/df['population']
df.head()

Unnamed: 0,name,population,white_pop,state_code,county_code,percent_white
0,"Meigs County, Tennessee",12758,11894,47,121,0.932278
1,"Montgomery County, Tennessee",220069,137775,47,125,0.626054
2,"Moore County, Tennessee",6461,6030,47,127,0.933292
3,"Obion County, Tennessee",30787,25009,47,131,0.812323
4,"Overton County, Tennessee",22511,21332,47,133,0.947626


In [13]:
df = df.drop(columns=['state_code', 'county_code'])

In [14]:
df.head()

Unnamed: 0,name,population,white_pop,percent_white
0,"Meigs County, Tennessee",12758,11894,0.932278
1,"Montgomery County, Tennessee",220069,137775,0.626054
2,"Moore County, Tennessee",6461,6030,0.933292
3,"Obion County, Tennessee",30787,25009,0.812323
4,"Overton County, Tennessee",22511,21332,0.947626


In [15]:
df.to_csv(f'tenessee_county_pop_data_with_white_pop.csv', index=False)