# Ex 7.1: Part III (Outside Class)

In [1]:
import pandas as pd
import json
import requests
import pprint

import plotly.express as px

# 1. Build the API Request URL

### 1. Start your API Data Request  
- Requests always begin with: https://api.census.gov/data  

In [2]:
base_url = "https://api.census.gov/data"

### 2. Add the Dataset Vintage Year and Name  

In [3]:
dataset_name = "/2019/pep/population"

### 3. Start your Variable Request


In [4]:
get_start = "?get="

### 4. Add your Variables  
- Available Variables for Dataset: https://api.census.gov/data/2019/pep/population/variables.html


In [5]:
get_variables = "NAME,POP"

###   5. Add your Geography  
- Available Geographies for Dataset:  https://api.census.gov/data/2021/pep/population/geography.html  



In [6]:
geography = "&for=place:*"

###   6. Put it all Together and Test  
- Click the Link below to see if you formed the API correctly and if the data coming back is what you want.

In [7]:
request_url = base_url + dataset_name + get_start + get_variables + geography
print("request_url = ", request_url)

request_url =  https://api.census.gov/data/2019/pep/population?get=NAME,POP&for=place:*


# 2. Use *requests* library to make the API call

In [8]:
# Make API Call
r = requests.get(request_url)

api_results = r.json()

In [9]:
#print(api_results)

In [10]:
# pprint makes it possible to see the structure of the returned data -- but it can be very, very long!
# pprint.pprint(api_results)

# 3. Get the data into a Dataframe  
- These Census Data results are in a list and have a specific form:  
  - The first element is a list of column names  
  - The remaining list elements are data  
  
  

In [11]:
df = pd.DataFrame(api_results)

print(df.shape)
df.head()

(19503, 4)


Unnamed: 0,0,1,2,3
0,NAME,POP,state,place
1,"Abbeville city, Alabama",2560,01,00124
2,"Adamsville city, Alabama",4281,01,00460
3,"Addison town, Alabama",718,01,00484
4,"Alexander City city, Alabama",14317,01,01132


# 4. Get the first Row into columns and then get rid of it

### a. Grab the Column Names out of the First Row of the Dataframe
- Use iloc to point to the first row of the dataframe 

In [12]:
column_names = df.iloc[0]

print(column_names)

0     NAME
1      POP
2    state
3    place
Name: 0, dtype: object


### b. Set the columns property of the Dataframe equal to the column names we grabbed  

In [13]:
df.columns = column_names

print(df.shape)
df.head()

(19503, 4)


Unnamed: 0,NAME,POP,state,place
0,NAME,POP,state,place
1,"Abbeville city, Alabama",2560,01,00124
2,"Adamsville city, Alabama",4281,01,00460
3,"Addison town, Alabama",718,01,00484
4,"Alexander City city, Alabama",14317,01,01132


### c. Now Get Rid of the First Row of the Dataframe

In [14]:
df = df.iloc[1:]

print("7.1 Demo:  2021 State Populations")
print(df.shape)
df

7.1 Demo:  2021 State Populations
(19502, 4)


Unnamed: 0,NAME,POP,state,place
1,"Abbeville city, Alabama",2560,01,00124
2,"Adamsville city, Alabama",4281,01,00460
3,"Addison town, Alabama",718,01,00484
4,"Alexander City city, Alabama",14317,01,01132
5,"Akron town, Alabama",328,01,00676
...,...,...,...,...
19498,"Van Tassell town, Wyoming",18,56,79705
19499,"Wamsutter town, Wyoming",467,56,81300
19500,"Wheatland town, Wyoming",3462,56,83040
19501,"Wright town, Wyoming",1753,56,85015


In [15]:
df.query("state == '13'", inplace=True)
df.sort_values(by="place", ascending=True, inplace=True)

print(df.shape)
df.head()

(538, 4)


Unnamed: 0,NAME,POP,state,place
2455,"Abbeville city, Georgia",2684,13,184
2459,"Acworth city, Georgia",22818,13,408
2460,"Adairsville city, Georgia",4963,13,436
2461,"Adel city, Georgia",5336,13,576
2463,"Adrian city, Georgia",668,13,660


## Cleaning data

In [16]:
df.dtypes

0
NAME     object
POP      object
state    object
place    object
dtype: object

In [17]:
two_new_cols = ['Place_Name', 'State_Name']

df[two_new_cols] = df['NAME'].str.split(',',1, expand=True)

print(df.shape)
df.head()

(538, 6)


Unnamed: 0,NAME,POP,state,place,Place_Name,State_Name
2455,"Abbeville city, Georgia",2684,13,184,Abbeville city,Georgia
2459,"Acworth city, Georgia",22818,13,408,Acworth city,Georgia
2460,"Adairsville city, Georgia",4963,13,436,Adairsville city,Georgia
2461,"Adel city, Georgia",5336,13,576,Adel city,Georgia
2463,"Adrian city, Georgia",668,13,660,Adrian city,Georgia


In [18]:
cols_to_rename = {
                   'POP':'Population', 
                   'state' : 'FIPS_State',
                   'place' : 'FIPS_Place'
                 }
df.rename(columns = cols_to_rename, inplace=True)

print(df.shape)
df.head()

(538, 6)


Unnamed: 0,NAME,Population,FIPS_State,FIPS_Place,Place_Name,State_Name
2455,"Abbeville city, Georgia",2684,13,184,Abbeville city,Georgia
2459,"Acworth city, Georgia",22818,13,408,Acworth city,Georgia
2460,"Adairsville city, Georgia",4963,13,436,Adairsville city,Georgia
2461,"Adel city, Georgia",5336,13,576,Adel city,Georgia
2463,"Adrian city, Georgia",668,13,660,Adrian city,Georgia


In [19]:
cols_to_keep = ['Place_Name', 'Population', 'FIPS_State', 'FIPS_Place', 'State_Name']
df = df[cols_to_keep]

print("7.1 Part 3: Populations for 2019 GA Places")
print(df.shape)
df.head()

7.1 Part 3: Populations for 2019 GA Places
(538, 5)


Unnamed: 0,Place_Name,Population,FIPS_State,FIPS_Place,State_Name
2455,Abbeville city,2684,13,184,Georgia
2459,Acworth city,22818,13,408,Georgia
2460,Adairsville city,4963,13,436,Georgia
2461,Adel city,5336,13,576,Georgia
2463,Adrian city,668,13,660,Georgia


## Plot

In [20]:
df["Population"] = pd.to_numeric(df['Population'])
df.sort_values(by="Population", ascending=False, inplace=True)
df_top = df.iloc[ : 5]
df_top.sort_values(by="Population", ascending=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Population"] = pd.to_numeric(df['Population'])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_values(by="Population", ascending=False, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_top.sort_values(by="Population", ascending=True, inplace=True)


In [21]:
fig = px.bar(df_top,              
             x='Population', 
             y='Place_Name',
             text='Population',
             orientation='h',   
             template='plotly_dark',
             title='Part 3: Top 5 GA Places (2019)')

fig.update_traces(textposition='auto', 
                  texttemplate='%{text:,.0f}'   # This adds commas and formats to zero decimal places
                 )

fig.show()

# Optional:  Save the Joined Dataframe as a CSV file

In [22]:
#csv_file_to_create = "6.1_Demo_Output_Data.csv"

#filename_with_path = "Data/" + csv_file_to_create
#df.to_csv(filename_with_path, index=False)