# Question: Which census tracts in NC have the highest and lowest percentage of computer and internet access?
* Data source: acs/acs5/profile for census tracts (https://api.census.gov/data/2020/acs/acs5/profile?get=NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE&for=tract:*&in=state:37)
* Vintage: 2020
* Geography Level: Census Tracts

### Imports

In [16]:
import pandas as pd
import requests

### Build the API request url

In [17]:
base_url = "https://api.census.gov/data"
dataset_name = "/2020/acs/acs5/profile"
get_start = "?get="
get_variables = "NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE"
geography = "&for=tract:*&in=state:37"
request_url = base_url + dataset_name + get_start + get_variables + geography
print("request_url = ", request_url)

request_url =  https://api.census.gov/data/2020/acs/acs5/profile?get=NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE&for=tract:*&in=state:37


### Use *requests* library to make the API call

In [18]:
r = requests.get(request_url)

api_results = r.json()

### Get the data into a Dataframe

In [19]:
df_tract = pd.DataFrame(api_results)

print(df_tract.shape)
df_tract

(2673, 8)


Unnamed: 0,0,1,2,3,4,5,6,7
0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county,tract
1,"Census Tract 201, Alamance County, North Carolina",1671,90.7,1403,76.2,37,001,020100
2,"Census Tract 202, Alamance County, North Carolina",1268,94.0,1117,82.8,37,001,020200
3,"Census Tract 203.01, Alamance County, North Ca...",1221,85.9,1043,73.3,37,001,020301
4,"Census Tract 203.02, Alamance County, North Ca...",1135,92.4,1147,93.4,37,001,020302
...,...,...,...,...,...,...,...,...
2668,"Census Tract 535.17, Wake County, North Carolina",1571,97.3,1491,92.4,37,183,053517
2669,"Census Tract 535.18, Wake County, North Carolina",1724,97.3,1724,97.3,37,183,053518
2670,"Census Tract 535.19, Wake County, North Carolina",1736,97.3,1643,92.0,37,183,053519
2671,"Census Tract 535.20, Wake County, North Carolina",2778,99.4,2424,86.7,37,183,053520


### Getting the first row into the columns & removing

In [20]:
column_names = df_tract.iloc[0]

print(column_names)

0           NAME
1     DP02_0153E
2    DP02_0153PE
3     DP02_0154E
4    DP02_0154PE
5          state
6         county
7          tract
Name: 0, dtype: object


In [21]:
df_tract.columns = column_names

print(df_tract.shape)
df_tract.head()

(2673, 8)


Unnamed: 0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county,tract
0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county,tract
1,"Census Tract 201, Alamance County, North Carolina",1671,90.7,1403,76.2,37,001,020100
2,"Census Tract 202, Alamance County, North Carolina",1268,94.0,1117,82.8,37,001,020200
3,"Census Tract 203.01, Alamance County, North Ca...",1221,85.9,1043,73.3,37,001,020301
4,"Census Tract 203.02, Alamance County, North Ca...",1135,92.4,1147,93.4,37,001,020302


In [22]:
df_tract = df_tract.iloc[1:]

print(df_tract.shape)
df_tract

(2672, 8)


Unnamed: 0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county,tract
1,"Census Tract 201, Alamance County, North Carolina",1671,90.7,1403,76.2,37,001,020100
2,"Census Tract 202, Alamance County, North Carolina",1268,94.0,1117,82.8,37,001,020200
3,"Census Tract 203.01, Alamance County, North Ca...",1221,85.9,1043,73.3,37,001,020301
4,"Census Tract 203.02, Alamance County, North Ca...",1135,92.4,1147,93.4,37,001,020302
5,"Census Tract 204, Alamance County, North Carolina",2102,83.2,1718,68.0,37,001,020400
...,...,...,...,...,...,...,...,...
2668,"Census Tract 535.17, Wake County, North Carolina",1571,97.3,1491,92.4,37,183,053517
2669,"Census Tract 535.18, Wake County, North Carolina",1724,97.3,1724,97.3,37,183,053518
2670,"Census Tract 535.19, Wake County, North Carolina",1736,97.3,1643,92.0,37,183,053519
2671,"Census Tract 535.20, Wake County, North Carolina",2778,99.4,2424,86.7,37,183,053520


### Splitting the columns to get rid of extra information

In [23]:
three_cols = ["Census_Tract_Name", "County_Name", "trash"]
df_tract[three_cols] = df_tract["NAME"].str.split(',',2, expand=True)

print(df_tract.shape)
df_tract.head()

(2672, 11)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tract[three_cols] = df_tract["NAME"].str.split(',',2, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tract[three_cols] = df_tract["NAME"].str.split(',',2, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tract[three_cols] = df_tract["NAME"].str.split(',',2, expand=T

Unnamed: 0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county,tract,Census_Tract_Name,County_Name,trash
1,"Census Tract 201, Alamance County, North Carolina",1671,90.7,1403,76.2,37,1,20100,Census Tract 201,Alamance County,North Carolina
2,"Census Tract 202, Alamance County, North Carolina",1268,94.0,1117,82.8,37,1,20200,Census Tract 202,Alamance County,North Carolina
3,"Census Tract 203.01, Alamance County, North Ca...",1221,85.9,1043,73.3,37,1,20301,Census Tract 203.01,Alamance County,North Carolina
4,"Census Tract 203.02, Alamance County, North Ca...",1135,92.4,1147,93.4,37,1,20302,Census Tract 203.02,Alamance County,North Carolina
5,"Census Tract 204, Alamance County, North Carolina",2102,83.2,1718,68.0,37,1,20400,Census Tract 204,Alamance County,North Carolina


In [24]:
df_tract["County_Name"] = df_tract["County_Name"].str.replace("County", "")

print(df_tract.shape)
df_tract.head()

(2672, 11)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tract["County_Name"] = df_tract["County_Name"].str.replace("County", "")


Unnamed: 0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,county,tract,Census_Tract_Name,County_Name,trash
1,"Census Tract 201, Alamance County, North Carolina",1671,90.7,1403,76.2,37,1,20100,Census Tract 201,Alamance,North Carolina
2,"Census Tract 202, Alamance County, North Carolina",1268,94.0,1117,82.8,37,1,20200,Census Tract 202,Alamance,North Carolina
3,"Census Tract 203.01, Alamance County, North Ca...",1221,85.9,1043,73.3,37,1,20301,Census Tract 203.01,Alamance,North Carolina
4,"Census Tract 203.02, Alamance County, North Ca...",1135,92.4,1147,93.4,37,1,20302,Census Tract 203.02,Alamance,North Carolina
5,"Census Tract 204, Alamance County, North Carolina",2102,83.2,1718,68.0,37,1,20400,Census Tract 204,Alamance,North Carolina


### Renaming the columns

In [25]:
df_tract.rename(columns={"DP02_0153E": "Total_Computer", "DP02_0153PE": "Total_Computer_Percent", "DP02_0154E": "Total_Broadband_Internet", "DP02_0154PE": "Total_Broadband_Internet_Percent", "state": "State_FIPS", "county": "County_FIPS", "tract": "Census_Tract_FIPS"}, inplace=True)

print(df_tract.shape)
df_tract.head()

(2672, 11)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tract.rename(columns={"DP02_0153E": "Total_Computer", "DP02_0153PE": "Total_Computer_Percent", "DP02_0154E": "Total_Broadband_Internet", "DP02_0154PE": "Total_Broadband_Internet_Percent", "state": "State_FIPS", "county": "County_FIPS", "tract": "Census_Tract_FIPS"}, inplace=True)


Unnamed: 0,NAME,Total_Computer,Total_Computer_Percent,Total_Broadband_Internet,Total_Broadband_Internet_Percent,State_FIPS,County_FIPS,Census_Tract_FIPS,Census_Tract_Name,County_Name,trash
1,"Census Tract 201, Alamance County, North Carolina",1671,90.7,1403,76.2,37,1,20100,Census Tract 201,Alamance,North Carolina
2,"Census Tract 202, Alamance County, North Carolina",1268,94.0,1117,82.8,37,1,20200,Census Tract 202,Alamance,North Carolina
3,"Census Tract 203.01, Alamance County, North Ca...",1221,85.9,1043,73.3,37,1,20301,Census Tract 203.01,Alamance,North Carolina
4,"Census Tract 203.02, Alamance County, North Ca...",1135,92.4,1147,93.4,37,1,20302,Census Tract 203.02,Alamance,North Carolina
5,"Census Tract 204, Alamance County, North Carolina",2102,83.2,1718,68.0,37,1,20400,Census Tract 204,Alamance,North Carolina


### Convert to ints and floats

In [26]:
df_tract.dtypes

0
NAME                                object
Total_Computer                      object
Total_Computer_Percent              object
Total_Broadband_Internet            object
Total_Broadband_Internet_Percent    object
State_FIPS                          object
County_FIPS                         object
Census_Tract_FIPS                   object
Census_Tract_Name                   object
County_Name                         object
trash                               object
dtype: object

In [27]:
df_tract["Total_Computer"] = pd.to_numeric(df_tract["Total_Computer"]).astype(int)
df_tract["Total_Broadband_Internet"] = pd.to_numeric(df_tract["Total_Broadband_Internet"]).astype(int)
df_tract["Total_Computer_Percent"] = pd.to_numeric(df_tract["Total_Computer_Percent"]).astype(float)
df_tract["Total_Broadband_Internet_Percent"] = pd.to_numeric(df_tract["Total_Broadband_Internet_Percent"]).astype(float)

df_tract.dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tract["Total_Computer"] = pd.to_numeric(df_tract["Total_Computer"]).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tract["Total_Broadband_Internet"] = pd.to_numeric(df_tract["Total_Broadband_Internet"]).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tract["Total_Com

0
NAME                                 object
Total_Computer                        int64
Total_Computer_Percent              float64
Total_Broadband_Internet              int64
Total_Broadband_Internet_Percent    float64
State_FIPS                           object
County_FIPS                          object
Census_Tract_FIPS                    object
Census_Tract_Name                    object
County_Name                          object
trash                                object
dtype: object

### Make a FIPS_Code column

In [28]:
df_tract["FIPS_Code"] = df_tract["State_FIPS"] + df_tract["County_FIPS"] + df_tract["Census_Tract_FIPS"]

print(df_tract.shape)
df_tract.head()

(2672, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tract["FIPS_Code"] = df_tract["State_FIPS"] + df_tract["County_FIPS"] + df_tract["Census_Tract_FIPS"]


Unnamed: 0,NAME,Total_Computer,Total_Computer_Percent,Total_Broadband_Internet,Total_Broadband_Internet_Percent,State_FIPS,County_FIPS,Census_Tract_FIPS,Census_Tract_Name,County_Name,trash,FIPS_Code
1,"Census Tract 201, Alamance County, North Carolina",1671,90.7,1403,76.2,37,1,20100,Census Tract 201,Alamance,North Carolina,37001020100
2,"Census Tract 202, Alamance County, North Carolina",1268,94.0,1117,82.8,37,1,20200,Census Tract 202,Alamance,North Carolina,37001020200
3,"Census Tract 203.01, Alamance County, North Ca...",1221,85.9,1043,73.3,37,1,20301,Census Tract 203.01,Alamance,North Carolina,37001020301
4,"Census Tract 203.02, Alamance County, North Ca...",1135,92.4,1147,93.4,37,1,20302,Census Tract 203.02,Alamance,North Carolina,37001020302
5,"Census Tract 204, Alamance County, North Carolina",2102,83.2,1718,68.0,37,1,20400,Census Tract 204,Alamance,North Carolina,37001020400


### Reorder and remove columns

In [29]:
cols_to_keep = ["Census_Tract_Name", "County_Name", "State_FIPS", "County_FIPS", "Census_Tract_FIPS", "FIPS_Code", "Total_Computer", "Total_Computer_Percent", "Total_Broadband_Internet", "Total_Broadband_Internet_Percent"]
df_tract = df_tract[cols_to_keep]

print(df_tract.shape)
df_tract.head()

(2672, 10)


Unnamed: 0,Census_Tract_Name,County_Name,State_FIPS,County_FIPS,Census_Tract_FIPS,FIPS_Code,Total_Computer,Total_Computer_Percent,Total_Broadband_Internet,Total_Broadband_Internet_Percent
1,Census Tract 201,Alamance,37,1,20100,37001020100,1671,90.7,1403,76.2
2,Census Tract 202,Alamance,37,1,20200,37001020200,1268,94.0,1117,82.8
3,Census Tract 203.01,Alamance,37,1,20301,37001020301,1221,85.9,1043,73.3
4,Census Tract 203.02,Alamance,37,1,20302,37001020302,1135,92.4,1147,93.4
5,Census Tract 204,Alamance,37,1,20400,37001020400,2102,83.2,1718,68.0


### Save to CSV

In [30]:
csv_file_to_create = "Census_Tract_Computer_Internet.csv"

filename_with_path = "Data/" + csv_file_to_create
df_tract.to_csv(filename_with_path, index=False)