# Question: Which places in NC have the highest and lowest percentage of computer and internet access?
* Data source: acs/acs5/profile for places (https://api.census.gov/data/2020/acs/acs5/profile?get=NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE&for=place:*&in=state:37)
* Vintage: 2020
* Geography Level: Places

### Imports

In [15]:
import pandas as pd
import requests

### Build the API request url

In [16]:
base_url = "https://api.census.gov/data"
dataset_name = "/2020/acs/acs5/profile"
get_start = "?get="
get_variables = "NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE"
geography = "&for=place:*&in=state:37"
request_url = base_url + dataset_name + get_start + get_variables + geography
print("request_url = ", request_url)

request_url =  https://api.census.gov/data/2020/acs/acs5/profile?get=NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE&for=place:*&in=state:37


### Use *requests* library to make the API call

In [17]:
r = requests.get(request_url)

api_results = r.json()

### Get the data into a Dataframe

In [18]:
df_place = pd.DataFrame(api_results)

print(df_place.shape)
df_place

(777, 7)


Unnamed: 0,0,1,2,3,4,5,6
0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,place
1,"Aberdeen town, North Carolina",2842,90.6,2839,90.5,37,00160
2,"Advance CDP, North Carolina",334,85.2,312,79.6,37,00440
3,"Ahoskie town, North Carolina",1542,80.9,1332,69.9,37,00500
4,"Alamance village, North Carolina",399,95.2,387,92.4,37,00640
...,...,...,...,...,...,...,...
772,"Yadkinville town, North Carolina",992,84.5,921,78.4,37,75960
773,"Yanceyville town, North Carolina",861,71.3,582,48.2,37,76000
774,"Youngsville town, North Carolina",841,92.9,831,91.8,37,76200
775,"Zebulon town, North Carolina",1570,80.2,1467,74.9,37,76220


### Getting the first row into the columns & removing

In [19]:
column_names = df_place.iloc[0]

print(column_names)

0           NAME
1     DP02_0153E
2    DP02_0153PE
3     DP02_0154E
4    DP02_0154PE
5          state
6          place
Name: 0, dtype: object


In [20]:
df_place.columns = column_names

print(df_place.shape)
df_place.head()

(777, 7)


Unnamed: 0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,place
0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,place
1,"Aberdeen town, North Carolina",2842,90.6,2839,90.5,37,00160
2,"Advance CDP, North Carolina",334,85.2,312,79.6,37,00440
3,"Ahoskie town, North Carolina",1542,80.9,1332,69.9,37,00500
4,"Alamance village, North Carolina",399,95.2,387,92.4,37,00640


In [21]:
df_place = df_place.iloc[1:]

print(df_place.shape)
df_place

(776, 7)


Unnamed: 0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,place
1,"Aberdeen town, North Carolina",2842,90.6,2839,90.5,37,00160
2,"Advance CDP, North Carolina",334,85.2,312,79.6,37,00440
3,"Ahoskie town, North Carolina",1542,80.9,1332,69.9,37,00500
4,"Alamance village, North Carolina",399,95.2,387,92.4,37,00640
5,"Albemarle city, North Carolina",5255,84.0,4860,77.7,37,00680
...,...,...,...,...,...,...,...
772,"Yadkinville town, North Carolina",992,84.5,921,78.4,37,75960
773,"Yanceyville town, North Carolina",861,71.3,582,48.2,37,76000
774,"Youngsville town, North Carolina",841,92.9,831,91.8,37,76200
775,"Zebulon town, North Carolina",1570,80.2,1467,74.9,37,76220


### Splitting the columns to get rid of extra information

In [22]:
two_cols = ["Place_Name", "Trash"]
df_place[two_cols] = df_place["NAME"].str.split(',',1, expand=True)

print(df_place.shape)
df_place.head()

(776, 9)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_place[two_cols] = df_place["NAME"].str.split(',',1, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_place[two_cols] = df_place["NAME"].str.split(',',1, expand=True)


Unnamed: 0,NAME,DP02_0153E,DP02_0153PE,DP02_0154E,DP02_0154PE,state,place,Place_Name,Trash
1,"Aberdeen town, North Carolina",2842,90.6,2839,90.5,37,160,Aberdeen town,North Carolina
2,"Advance CDP, North Carolina",334,85.2,312,79.6,37,440,Advance CDP,North Carolina
3,"Ahoskie town, North Carolina",1542,80.9,1332,69.9,37,500,Ahoskie town,North Carolina
4,"Alamance village, North Carolina",399,95.2,387,92.4,37,640,Alamance village,North Carolina
5,"Albemarle city, North Carolina",5255,84.0,4860,77.7,37,680,Albemarle city,North Carolina


### Renaming the columns

In [23]:
df_place.rename(columns={"DP02_0153E": "Total_Computer", "DP02_0153PE": "Total_Computer_Percent", "DP02_0154E": "Total_Broadband_Internet", "DP02_0154PE": "Total_Broadband_Internet_Percent", "state": "State_FIPS", "place": "Place_FIPS"}, inplace=True)

print(df_place.shape)
df_place.head()

(776, 9)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_place.rename(columns={"DP02_0153E": "Total_Computer", "DP02_0153PE": "Total_Computer_Percent", "DP02_0154E": "Total_Broadband_Internet", "DP02_0154PE": "Total_Broadband_Internet_Percent", "state": "State_FIPS", "place": "Place_FIPS"}, inplace=True)


Unnamed: 0,NAME,Total_Computer,Total_Computer_Percent,Total_Broadband_Internet,Total_Broadband_Internet_Percent,State_FIPS,Place_FIPS,Place_Name,Trash
1,"Aberdeen town, North Carolina",2842,90.6,2839,90.5,37,160,Aberdeen town,North Carolina
2,"Advance CDP, North Carolina",334,85.2,312,79.6,37,440,Advance CDP,North Carolina
3,"Ahoskie town, North Carolina",1542,80.9,1332,69.9,37,500,Ahoskie town,North Carolina
4,"Alamance village, North Carolina",399,95.2,387,92.4,37,640,Alamance village,North Carolina
5,"Albemarle city, North Carolina",5255,84.0,4860,77.7,37,680,Albemarle city,North Carolina


### Convert to ints and floats

In [24]:
df_place.dtypes

0
NAME                                object
Total_Computer                      object
Total_Computer_Percent              object
Total_Broadband_Internet            object
Total_Broadband_Internet_Percent    object
State_FIPS                          object
Place_FIPS                          object
Place_Name                          object
Trash                               object
dtype: object

In [25]:
df_place["Total_Computer"] = pd.to_numeric(df_place["Total_Computer"]).astype(int)
df_place["Total_Broadband_Internet"] = pd.to_numeric(df_place["Total_Broadband_Internet"]).astype(int)
df_place["Total_Computer_Percent"] = pd.to_numeric(df_place["Total_Computer_Percent"]).astype(float)
df_place["Total_Broadband_Internet_Percent"] = pd.to_numeric(df_place["Total_Broadband_Internet_Percent"]).astype(float)

df_place.dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_place["Total_Computer"] = pd.to_numeric(df_place["Total_Computer"]).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_place["Total_Broadband_Internet"] = pd.to_numeric(df_place["Total_Broadband_Internet"]).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_place["Total_Com

0
NAME                                 object
Total_Computer                        int64
Total_Computer_Percent              float64
Total_Broadband_Internet              int64
Total_Broadband_Internet_Percent    float64
State_FIPS                           object
Place_FIPS                           object
Place_Name                           object
Trash                                object
dtype: object

### Make a FIPS_Code column

In [26]:
df_place["FIPS_Code"] = df_place["State_FIPS"] + df_place["Place_FIPS"]

print(df_place.shape)
df_place.head()

(776, 10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_place["FIPS_Code"] = df_place["State_FIPS"] + df_place["Place_FIPS"]


Unnamed: 0,NAME,Total_Computer,Total_Computer_Percent,Total_Broadband_Internet,Total_Broadband_Internet_Percent,State_FIPS,Place_FIPS,Place_Name,Trash,FIPS_Code
1,"Aberdeen town, North Carolina",2842,90.6,2839,90.5,37,160,Aberdeen town,North Carolina,3700160
2,"Advance CDP, North Carolina",334,85.2,312,79.6,37,440,Advance CDP,North Carolina,3700440
3,"Ahoskie town, North Carolina",1542,80.9,1332,69.9,37,500,Ahoskie town,North Carolina,3700500
4,"Alamance village, North Carolina",399,95.2,387,92.4,37,640,Alamance village,North Carolina,3700640
5,"Albemarle city, North Carolina",5255,84.0,4860,77.7,37,680,Albemarle city,North Carolina,3700680


### Reorder and remove columns

In [27]:
cols_to_keep = ["Place_Name", "State_FIPS", "Place_FIPS", "FIPS_Code", "Total_Computer", "Total_Computer_Percent", "Total_Broadband_Internet", "Total_Broadband_Internet_Percent"]
df_place = df_place[cols_to_keep]

print(df_place.shape)
df_place.head()

(776, 8)


Unnamed: 0,Place_Name,State_FIPS,Place_FIPS,FIPS_Code,Total_Computer,Total_Computer_Percent,Total_Broadband_Internet,Total_Broadband_Internet_Percent
1,Aberdeen town,37,160,3700160,2842,90.6,2839,90.5
2,Advance CDP,37,440,3700440,334,85.2,312,79.6
3,Ahoskie town,37,500,3700500,1542,80.9,1332,69.9
4,Alamance village,37,640,3700640,399,95.2,387,92.4
5,Albemarle city,37,680,3700680,5255,84.0,4860,77.7


### Save to CSV

In [28]:
csv_file_to_create = "Place_Computer_Internet.csv"

filename_with_path = "Data/" + csv_file_to_create
df_place.to_csv(filename_with_path, index=False)