# Question: Which counties in NC have the highest and lowest percentage of computer and internet access?
* Data source: acs/acs5/profile for counties (https://api.census.gov/data/2020/acs/acs5/profile?get=NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE&for=county:*&in=state:37)
* Vintage: 2020
* Geography Level: Counties

### Imports

In [71]:
import pandas as pd
import requests

### Build the API request url

In [72]:
base_url = "https://api.census.gov/data"
dataset_name = "/2020/acs/acs5/profile"
get_start = "?get="
get_variables = "NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE"
geography = "&for=county:*&in=state:37"
request_url = base_url + dataset_name + get_start + get_variables + geography
print("request_url = ", request_url)

request_url =  https://api.census.gov/data/2020/acs/acs5/profile?get=NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE&for=county:*&in=state:37


### Use *requests* library to make the API call

In [73]:
r = requests.get(request_url)

api_results = r.json()

### Get the data into a Dataframe

In [74]:
df_county = pd.DataFrame(api_results)

print(df_county.shape)
df_county

(101, 7)


Unnamed: 0,0,1,2,3,4,5,6
0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county
1,"Anson County, North Carolina",8315,84.8,7737,78.9,37,007
2,"Beaufort County, North Carolina",17349,85.8,14964,74.0,37,013
3,"Brunswick County, North Carolina",55782,93.9,52355,88.1,37,019
4,"Cabarrus County, North Carolina",68821,94.5,64618,88.7,37,025
...,...,...,...,...,...,...,...
96,"Washington County, North Carolina",3812,72.8,3438,65.6,37,187
97,"Watauga County, North Carolina",19564,91.2,18109,84.4,37,189
98,"Wilkes County, North Carolina",24956,86.3,22996,79.6,37,193
99,"Wilson County, North Carolina",26458,82.8,24419,76.4,37,195


### Getting the first row into the columns & removing

In [75]:
column_names = df_county.iloc[0]

print(column_names)

0           NAME
1     DP02_0153E
2    DP02_0153PE
3     DP02_0154E
4    DP02_0154PE
5          state
6         county
Name: 0, dtype: object


In [76]:
df_county.columns = column_names

print(df_county.shape)
df_county.head()

(101, 7)


Unnamed: 0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county
0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county
1,"Anson County, North Carolina",8315,84.8,7737,78.9,37,007
2,"Beaufort County, North Carolina",17349,85.8,14964,74.0,37,013
3,"Brunswick County, North Carolina",55782,93.9,52355,88.1,37,019
4,"Cabarrus County, North Carolina",68821,94.5,64618,88.7,37,025


In [77]:
df_county = df_county.iloc[1:]

print(df_county.shape)
df_county

(100, 7)


Unnamed: 0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county
1,"Anson County, North Carolina",8315,84.8,7737,78.9,37,007
2,"Beaufort County, North Carolina",17349,85.8,14964,74.0,37,013
3,"Brunswick County, North Carolina",55782,93.9,52355,88.1,37,019
4,"Cabarrus County, North Carolina",68821,94.5,64618,88.7,37,025
5,"Carteret County, North Carolina",27889,92.8,26256,87.3,37,031
...,...,...,...,...,...,...,...
96,"Washington County, North Carolina",3812,72.8,3438,65.6,37,187
97,"Watauga County, North Carolina",19564,91.2,18109,84.4,37,189
98,"Wilkes County, North Carolina",24956,86.3,22996,79.6,37,193
99,"Wilson County, North Carolina",26458,82.8,24419,76.4,37,195


# Skill: Splitting Columns

In [78]:
two_cols = ["County_Name", "Trash"]
df_county[two_cols] = df_county["NAME"].str.split(',',1, expand=True)

print(df_county.shape)
df_county.head()

(100, 9)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_county[two_cols] = df_county["NAME"].str.split(',',1, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_county[two_cols] = df_county["NAME"].str.split(',',1, expand=True)


Unnamed: 0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county,County_Name,Trash
1,"Anson County, North Carolina",8315,84.8,7737,78.9,37,7,Anson County,North Carolina
2,"Beaufort County, North Carolina",17349,85.8,14964,74.0,37,13,Beaufort County,North Carolina
3,"Brunswick County, North Carolina",55782,93.9,52355,88.1,37,19,Brunswick County,North Carolina
4,"Cabarrus County, North Carolina",68821,94.5,64618,88.7,37,25,Cabarrus County,North Carolina
5,"Carteret County, North Carolina",27889,92.8,26256,87.3,37,31,Carteret County,North Carolina


# Skill: replace()

In [79]:
df_county["County_Name"] = df_county["County_Name"].str.replace("County", "")

print(df_county.shape)
df_county.head()

(100, 9)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_county["County_Name"] = df_county["County_Name"].str.replace("County", "")


Unnamed: 0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county,County_Name,Trash
1,"Anson County, North Carolina",8315,84.8,7737,78.9,37,7,Anson,North Carolina
2,"Beaufort County, North Carolina",17349,85.8,14964,74.0,37,13,Beaufort,North Carolina
3,"Brunswick County, North Carolina",55782,93.9,52355,88.1,37,19,Brunswick,North Carolina
4,"Cabarrus County, North Carolina",68821,94.5,64618,88.7,37,25,Cabarrus,North Carolina
5,"Carteret County, North Carolina",27889,92.8,26256,87.3,37,31,Carteret,North Carolina


### The County_Name column probably has trailing spaces after the replace(), so I'll go ahead and strip that column.
# Skill: strip()

In [80]:
df_county["County_Name"] = df_county["County_Name"].str.strip()

print(df_county.shape)
df_county.head()

(100, 9)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_county["County_Name"] = df_county["County_Name"].str.strip()


Unnamed: 0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county,County_Name,Trash
1,"Anson County, North Carolina",8315,84.8,7737,78.9,37,7,Anson,North Carolina
2,"Beaufort County, North Carolina",17349,85.8,14964,74.0,37,13,Beaufort,North Carolina
3,"Brunswick County, North Carolina",55782,93.9,52355,88.1,37,19,Brunswick,North Carolina
4,"Cabarrus County, North Carolina",68821,94.5,64618,88.7,37,25,Cabarrus,North Carolina
5,"Carteret County, North Carolina",27889,92.8,26256,87.3,37,31,Carteret,North Carolina


### Renaming the columns

In [81]:
df_county.rename(columns={"DP02_0153E": "Total_Computer", "DP02_0153PE": "Total_Computer_Percent", "DP02_0154E": "Total_Broadband_Internet", "DP02_0154PE": "Total_Broadband_Internet_Percent", "state": "State_FIPS", "county": "County_FIPS"}, inplace=True)

print(df_county.shape)
df_county.head()

(100, 9)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_county.rename(columns={"DP02_0153E": "Total_Computer", "DP02_0153PE": "Total_Computer_Percent", "DP02_0154E": "Total_Broadband_Internet", "DP02_0154PE": "Total_Broadband_Internet_Percent", "state": "State_FIPS", "county": "County_FIPS"}, inplace=True)


Unnamed: 0,NAME,Total_Computer,Total_Computer_Percent,Total_Broadband_Internet,Total_Broadband_Internet_Percent,State_FIPS,County_FIPS,County_Name,Trash
1,"Anson County, North Carolina",8315,84.8,7737,78.9,37,7,Anson,North Carolina
2,"Beaufort County, North Carolina",17349,85.8,14964,74.0,37,13,Beaufort,North Carolina
3,"Brunswick County, North Carolina",55782,93.9,52355,88.1,37,19,Brunswick,North Carolina
4,"Cabarrus County, North Carolina",68821,94.5,64618,88.7,37,25,Cabarrus,North Carolina
5,"Carteret County, North Carolina",27889,92.8,26256,87.3,37,31,Carteret,North Carolina


### Convert to ints and floats

In [82]:
df_county.dtypes

0
NAME                                object
Total_Computer                      object
Total_Computer_Percent              object
Total_Broadband_Internet            object
Total_Broadband_Internet_Percent    object
State_FIPS                          object
County_FIPS                         object
County_Name                         object
Trash                               object
dtype: object

In [83]:
df_county["Total_Computer"] = pd.to_numeric(df_county["Total_Computer"]).astype(int)
df_county["Total_Broadband_Internet"] = pd.to_numeric(df_county["Total_Broadband_Internet"]).astype(int)
df_county["Total_Computer_Percent"] = pd.to_numeric(df_county["Total_Computer_Percent"]).astype(float)
df_county["Total_Broadband_Internet_Percent"] = pd.to_numeric(df_county["Total_Broadband_Internet_Percent"]).astype(float)

df_county.dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_county["Total_Computer"] = pd.to_numeric(df_county["Total_Computer"]).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_county["Total_Broadband_Internet"] = pd.to_numeric(df_county["Total_Broadband_Internet"]).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_county["Tota

0
NAME                                 object
Total_Computer                        int64
Total_Computer_Percent              float64
Total_Broadband_Internet              int64
Total_Broadband_Internet_Percent    float64
State_FIPS                           object
County_FIPS                          object
County_Name                          object
Trash                                object
dtype: object

### Make a FIPS_Code column

In [84]:
df_county["FIPS_Code"] = df_county["State_FIPS"] + df_county["County_FIPS"]

print(df_county.shape)
df_county.head()

(100, 10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_county["FIPS_Code"] = df_county["State_FIPS"] + df_county["County_FIPS"]


Unnamed: 0,NAME,Total_Computer,Total_Computer_Percent,Total_Broadband_Internet,Total_Broadband_Internet_Percent,State_FIPS,County_FIPS,County_Name,Trash,FIPS_Code
1,"Anson County, North Carolina",8315,84.8,7737,78.9,37,7,Anson,North Carolina,37007
2,"Beaufort County, North Carolina",17349,85.8,14964,74.0,37,13,Beaufort,North Carolina,37013
3,"Brunswick County, North Carolina",55782,93.9,52355,88.1,37,19,Brunswick,North Carolina,37019
4,"Cabarrus County, North Carolina",68821,94.5,64618,88.7,37,25,Cabarrus,North Carolina,37025
5,"Carteret County, North Carolina",27889,92.8,26256,87.3,37,31,Carteret,North Carolina,37031


### Bring in the Rural/Urban County data
### This data comes from the dataframe created during Lab 8

In [85]:
df_urb_rur = pd.read_csv("Data/Rural_Urban_Counties.csv")

print(df_urb_rur.shape)
df_urb_rur

(100, 4)


Unnamed: 0,County_Name,Rural_Status,Population,State_Name
0,Alamance,Regional City and Suburban,173877,North Carolina
1,Alexander,Rural,36644,North Carolina
2,Alleghany,Rural,11049,North Carolina
3,Anson,Rural,22060,North Carolina
4,Ashe,Rural,26711,North Carolina
...,...,...,...,...
95,Wayne,Rural,116835,North Carolina
96,Wilkes,Rural,65806,North Carolina
97,Wilson,Rural,78369,North Carolina
98,Yadkin,Rural,37192,North Carolina


# Skill: Left Join w/ Rural Urban County Data

In [86]:
df_joined = pd.merge(df_county,
                       df_urb_rur,
                       left_on="County_Name",
                       right_on="County_Name",
                       how='left'         # Type of Join:  Left!
                      )

print(df_joined.shape)
df_joined.head()

(100, 13)


Unnamed: 0,NAME,Total_Computer,Total_Computer_Percent,Total_Broadband_Internet,Total_Broadband_Internet_Percent,State_FIPS,County_FIPS,County_Name,Trash,FIPS_Code,Rural_Status,Population,State_Name
0,"Anson County, North Carolina",8315,84.8,7737,78.9,37,7,Anson,North Carolina,37007,Rural,22060,North Carolina
1,"Beaufort County, North Carolina",17349,85.8,14964,74.0,37,13,Beaufort,North Carolina,37013,Rural,44468,North Carolina
2,"Brunswick County, North Carolina",55782,93.9,52355,88.1,37,19,Brunswick,North Carolina,37019,Rural,144215,North Carolina
3,"Cabarrus County, North Carolina",68821,94.5,64618,88.7,37,25,Cabarrus,North Carolina,37025,Regional City and Suburban,231278,North Carolina
4,"Carteret County, North Carolina",27889,92.8,26256,87.3,37,31,Carteret,North Carolina,37031,Rural,68541,North Carolina


### Reorder and remove columns

In [88]:
cols_to_keep = ["County_Name", "Rural_Status", "State_FIPS", "County_FIPS", "FIPS_Code", "Total_Computer", "Total_Computer_Percent", "Total_Broadband_Internet", "Total_Broadband_Internet_Percent"]
df_joined = df_joined[cols_to_keep]

print(df_joined.shape)
df_joined.head()

(100, 9)


Unnamed: 0,County_Name,Rural_Status,State_FIPS,County_FIPS,FIPS_Code,Total_Computer,Total_Computer_Percent,Total_Broadband_Internet,Total_Broadband_Internet_Percent
0,Anson,Rural,37,7,37007,8315,84.8,7737,78.9
1,Beaufort,Rural,37,13,37013,17349,85.8,14964,74.0
2,Brunswick,Rural,37,19,37019,55782,93.9,52355,88.1
3,Cabarrus,Regional City and Suburban,37,25,37025,68821,94.5,64618,88.7
4,Carteret,Rural,37,31,37031,27889,92.8,26256,87.3


### Save to CSV

In [90]:
csv_file_to_create = "County_Computer_Internet.csv"

filename_with_path = "Data/" + csv_file_to_create
df_joined.to_csv(filename_with_path, index=False)