# Spatial History
Intro to Digital History, 4/4/2022
Portions adapted from https://melaniewalsh.github.io/Intro-Cultural-Analytics/07-Mapping/01-Mapping.html

In [95]:
#!pip install geopy
#!pip install folium
#!pip install geopandas

You should consider upgrading via the '/Users/Spencer/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m[33m
You should consider upgrading via the '/Users/Spencer/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m[33m
Collecting pyproj>=2.2.0
  Downloading pyproj-3.3.0-cp38-cp38-macosx_10_9_x86_64.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: pyproj
  Attempting uninstall: pyproj
    Found existing installation: pyproj 1.9.6
    Uninstalling pyproj-1.9.6:
      Successfully uninstalled pyproj-1.9.6
Successfully installed pyproj-3.3.0
You should consider upgrading via the '/Users/Spencer/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [78]:
from geopy.geocoders import Nominatim
import folium

geolocator = Nominatim(user_agent="YOUR NAME's mapping app", timeout=2)

In [79]:
loc = "University of Utah"

location = geolocator.geocode(loc)

location

Location(University of Utah, 201, Presidents Circle, Salt Lake City, Salt Lake County, Utah, 84112, United States, (40.762813699999995, -111.83687191368261, 0.0))

In [80]:
print(location.address)

University of Utah, 201, Presidents Circle, Salt Lake City, Salt Lake County, Utah, 84112, United States


In [81]:
print(location.latitude, location.longitude)

40.762813699999995 -111.83687191368261


In [82]:
print(f"Class: {location.raw['class']} \nType: {location.raw['type']}")

Class: amenity 
Type: university


In [83]:
utah_map = folium.Map(location=[location.latitude, location.longitude],
                     zoom_start=14)

folium.Marker(location=[location.latitude, location.longitude],
             popup="University of Utah").add_to(utah_map)

utah_map

In [84]:
utah_map = folium.Map(location=[location.latitude, location.longitude],
                     zoom_start=10)

folium.CircleMarker(location=[location.latitude, location.longitude],
                    radius=10,
                    color = 'black',
                    fill_color = 'red',
                    fill_opacity = 0.6,
                    popup = "University of Utah").add_to(utah_map)

utah_map

In [85]:
possible_locations = geolocator.geocode("University of Utah", exactly_one=False)

for location in possible_locations:
    print(location.address)
    print(location.latitude, location.longitude)
    print(f"Importance: {location.raw['importance']}")

University of Utah, 201, Presidents Circle, Salt Lake City, Salt Lake County, Utah, 84112, United States
40.762813699999995 -111.83687191368261
Importance: 0.8557694184669417
Department of Linguistics, University of Utah, 255, Central Campus Drive, Salt Lake City, Salt Lake County, Utah, 84112, United States
40.7634795 -111.8408017
Importance: 0.31100000000000005
Outdoor Adventures, Presidents Circle, Salt Lake City, Salt Lake County, Utah, 84112, United States
40.7655129 -111.838165
Importance: 0.11100000000000002


# Where is Utah History?

Using Named Entity Recognition

In [86]:
#!pip install -U spacy
#!python -m spacy download en_core_web_sm
import spacy
from spacy import displacy
from collections import Counter
import pandas as pd
import en_core_web_sm
nlp = en_core_web_sm.load()

In [11]:
import pandas as pd

#Load UHQ data into a pandas dataframe
df = pd.read_excel("UHQ_FULL_EDITED.xlsx")

In [12]:
#Look at the first article in our database
text = df.text.tolist()[0]

#Run Named Entity Recognition on the text
document = nlp(text)

displacy.render(document[:500], style="ent")

In [13]:
#Create a list of all location (GPE) entities in the text
location_list = []
for named_entity in document.ents:
    if named_entity.label_ == "GPE":
        location_list.append(named_entity.text.replace("’s", ""))

#Print our list
location_list

['Salt Lake City',
 'Salt Lake City',
 'Salt Lake City',
 'Utah',
 'Salt Lake City',
 'Amanda',
 'Amanda',
 'California',
 'Salt Lake City',
 'Park City',
 'New York',
 'Utah',
 'Ogden',
 'Washington',
 'Utah',
 'Utah',
 'Utah',
 'Michigan',
 'Salt Lake City',
 'Salt Lake City',
 'Utah',
 'Utah',
 'New York',
 'Chicago',
 'Chicago',
 'Herald',
 'Russia']

### Question: What are some potential problems that you see with this list? 

In [14]:
#Is Amanda a place???
loc = "Amanda"

location = geolocator.geocode(loc)
location

Location(Amanda, Fairfield County, Ohio, United States, (39.649508, -82.7443367, 0.0))

In [15]:
#We can use the geolocator library to locate the latitude and longitude for each location. 

location_tally = Counter(location_list)

location_df = pd.DataFrame(location_tally.most_common(), columns=['location', 'count'])

loc_list = location_df.location.tolist()

lat = []
lon = []

for l in loc_list:
    temp_loc = geolocator.geocode(l)
    lat.append(temp_loc.latitude)
    lon.append(temp_loc.longitude)

location_df['lat'] = lat
location_df['lon'] = lon
location_df

Unnamed: 0,location,count,lat,lon
0,Salt Lake City,7,40.75962,-111.886798
1,Utah,7,39.422519,-111.714358
2,Amanda,2,39.649508,-82.744337
3,New York,2,40.712728,-74.006015
4,Chicago,2,41.875562,-87.624421
5,California,1,36.701463,-118.755997
6,Park City,1,40.646092,-111.497996
7,Ogden,1,41.223005,-111.973843
8,Washington,1,38.895037,-77.036543
9,Michigan,1,43.621195,-84.682435


Note that this returns a single point in space. Are Utah, California, or even Russia one point on a map? 

In [16]:
world_map = folium.Map(location=[42, -102], zoom_start=1)
location_df = location_df.dropna(subset=['lat', 'lon'])

lat = list(location_df["lat"])
long = list(location_df["lon"])
rad = list(location_df["count"])
name = list(location_df["location"])

for lt,ln,r,n in zip(lat,long,rad,name):
    folium.CircleMarker(location = [lt,ln],
                        popup=n,
                        radius = r).add_to(world_map)
world_map

## Using US Geological Survey Data

In [99]:
usgs_df = pd.read_csv("UTAH_USGS.csv")
len(usgs_df)

FileNotFoundError: [Errno 2] File UTAH_USGS.csv does not exist: 'UTAH_USGS.csv'

In [100]:
usgs_df[usgs_df.STATE_ALPHA == "UT"].FEATURE_CLASS.value_counts()

Valley             5730
Populated Place    3618
Spring             2554
Stream             2545
Summit             2351
Locale             1926
Church             1577
Reservoir           961
Building            906
Lake                899
School              870
Flat                775
Mine                704
Canal               576
Ridge               554
Cemetery            531
Park                455
Basin               445
Civil               403
Dam                 384
Gap                 310
Post Office         259
Bench               243
Airport             221
Arch                200
Trail               188
Pillar              179
Cliff               177
Well                131
Range               113
Tower               112
Bend                106
Census               98
Area                 96
Hospital             88
Bay                  63
Rapids               56
Oilfield             52
Slope                45
Plain                44
Bar                  38
Bridge          

In [19]:
feature = "Populated Place"
ut_usgs_df = usgs_df[(usgs_df.STATE_ALPHA == "UT") & (usgs_df.FEATURE_CLASS == feature)]

In [20]:
ut_usgs_df[ut_usgs_df.FEATURE_NAME == "Salt Lake City"]

Unnamed: 0,FEATURE_ID,FEATURE_NAME,FEATURE_CLASS,STATE_ALPHA,STATE_NUMERIC,COUNTY_NAME,COUNTY_NUMERIC,PRIMARY_LAT_DMS,PRIM_LONG_DMS,PRIM_LAT_DEC,PRIM_LONG_DEC,SOURCE_LAT_DMS,SOURCE_LONG_DMS,SOURCE_LAT_DEC,SOURCE_LONG_DEC,ELEV_IN_M,ELEV_IN_FT,MAP_NAME,DATE_CREATED,DATE_EDITED
1342331,1454997,Salt Lake City,Populated Place,UT,49,Salt Lake,35.0,404539N,1115328W,40.760779,-111.891047,,,,,1300.0,4265.0,Salt Lake City North,12/31/1979,05/11/2011


In [21]:
year_begin = 1928 #UHQ first year is 1928
year_end = 2021 #UHQ end year is 2021

#creating string containing all UHQ texts
df2 = df[(df['text'].notna()) & (df['year'] >= year_begin) & (df['year'] <= year_end)].reset_index()

full_uhq_texts = ' '.join([str(elem) for elem in df2.text.tolist()])

In [22]:
usgs_counts = []

for fn in ut_usgs_df.FEATURE_NAME.tolist():
    usgs_counts.append(full_uhq_texts.count(fn))
    
ut_usgs_df['FEATURE_COUNTS'] = usgs_counts

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ut_usgs_df['FEATURE_COUNTS'] = usgs_counts


In [23]:
len(usgs_counts), len(ut_usgs_df)

(3618, 3618)

In [24]:
ut_usgs_df

Unnamed: 0,FEATURE_ID,FEATURE_NAME,FEATURE_CLASS,STATE_ALPHA,STATE_NUMERIC,COUNTY_NAME,COUNTY_NUMERIC,PRIMARY_LAT_DMS,PRIM_LONG_DMS,PRIM_LAT_DEC,...,SOURCE_LAT_DMS,SOURCE_LONG_DMS,SOURCE_LAT_DEC,SOURCE_LONG_DEC,ELEV_IN_M,ELEV_IN_FT,MAP_NAME,DATE_CREATED,DATE_EDITED,FEATURE_COUNTS
347050,395885,Utida,Populated Place,UT,49,Cache,5.0,415957N,1115831W,41.999092,...,,,,,1411.0,4629.0,Trenton,12/31/1979,,2
1313673,1425039,Abraham,Populated Place,UT,49,Millard,27.0,392348N,1124305W,39.396620,...,,,,,1400.0,4593.0,Sutherland,12/31/1979,,335
1313684,1425050,Adamsville,Populated Place,UT,49,Beaver,1.0,381530N,1124738W,38.258303,...,,,,,1685.0,5528.0,Adamsville,12/31/1979,,1
1313730,1425096,Altamont,Populated Place,UT,49,Duchesne,13.0,402134N,1101711W,40.359398,...,,,,,1947.0,6388.0,Altamont,12/31/1979,03/21/2008,0
1313731,1425097,Altonah,Populated Place,UT,49,Duchesne,13.0,402405N,1101738W,40.401341,...,,,,,2034.0,6673.0,Altonah,12/31/1979,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2199708,2708039,Covered Bridge Canyon,Populated Place,UT,49,Utah,49.0,400209N,1113312W,40.035829,...,,,,,1559.0,5115.0,Spanish Fork Peak,11/04/2011,11/08/2011,0
2260035,2783908,Interlaken,Populated Place,UT,49,Wasatch,51.0,403230N,1112823W,40.541664,...,,,,,1805.0,5922.0,Heber City,04/28/2016,,0
2263734,2791542,Cedar Highlands,Populated Place,UT,49,Iron,21.0,373815N,1130243W,37.637628,...,,,,,2311.0,7582.0,Cedar City,05/17/2018,05/06/2021,0
2268987,2797924,Emigration Canyon,Populated Place,UT,49,Salt Lake,35.0,404612N,1114535W,40.769979,...,,,,,1639.0,5377.0,Fort Douglas,03/04/2019,,65


Question: Do you notice any potential problems with the displayed data above? Note that "Abraham" appears in UHQ 335 times, but do you think all of these occurances refer to the location?

In [25]:
place_name = "Salt Lake City"
ut_usgs_df[ut_usgs_df.FEATURE_NAME == place_name]

Unnamed: 0,FEATURE_ID,FEATURE_NAME,FEATURE_CLASS,STATE_ALPHA,STATE_NUMERIC,COUNTY_NAME,COUNTY_NUMERIC,PRIMARY_LAT_DMS,PRIM_LONG_DMS,PRIM_LAT_DEC,...,SOURCE_LAT_DMS,SOURCE_LONG_DMS,SOURCE_LAT_DEC,SOURCE_LONG_DEC,ELEV_IN_M,ELEV_IN_FT,MAP_NAME,DATE_CREATED,DATE_EDITED,FEATURE_COUNTS
1342331,1454997,Salt Lake City,Populated Place,UT,49,Salt Lake,35.0,404539N,1115328W,40.760779,...,,,,,1300.0,4265.0,Salt Lake City North,12/31/1979,05/11/2011,6049


In [26]:
utah_map = folium.Map(location=[39, -111], zoom_start=5)

min_number = 1

sub_df = ut_usgs_df[ut_usgs_df.FEATURE_COUNTS >= min_number]

lat = list(sub_df["PRIM_LAT_DEC"])
long = list(sub_df["PRIM_LONG_DEC"])
rad = list(sub_df["FEATURE_COUNTS"])
name = list(sub_df["FEATURE_NAME"])

for lt,ln,r,n in zip(lat,long,rad,name):
    folium.CircleMarker(location = [lt,ln],
                        popup=n,
                        tiles='Stamen Terrain',
                        radius = 1).add_to(utah_map)
utah_map

Try adjusting the "min_number" variable to see how your results change. Note that this approach is great at showing all of the places in Utah that are referenced at least 'n' number of times, but not great at showing volume. How much more likely is Salt Lake City or a place in Salt Lake County to appear than other locations in Utah? For that, we can turn to choropleth maps. 

## Choropleth Maps

In [96]:
#Import Libraries
import geopandas as gpd
import pandas as pd
import numpy as np
import folium
from folium.features import GeoJsonTooltip

#Read the geoJSON file using geopandas
geojson = gpd.read_file('spatial history/georef-united-states-of-america-county.geojson')
geojson=geojson[geojson['ste_name'] == "Utah"]

ut_county_map = folium.Map(location=[39.5, -111], zoom_start=7)

folium.Choropleth(geo_data=geojson).add_to(ut_county_map)
ut_county_map

In [97]:
min_number = 1

sub_df = ut_usgs_df[ut_usgs_df.FEATURE_COUNTS >= min_number]

county_counts = pd.DataFrame(sub_df['COUNTY_NAME'].value_counts().reset_index())
county_counts.columns = ['county_name', 'loc_count']
total_count = []
for county in county_counts.county_name.tolist():
    #sub_df = ut_usgs_df[ut_usgs_df.COUNTY_NAME == county]
    total_count.append(sub_df[sub_df.COUNTY_NAME == county]['FEATURE_COUNTS'].sum())

county_counts['total_count'] = total_count

county_counts

AttributeError: 'DataFrame' object has no attribute 'FEATURE_COUNTS'

In [98]:
df_final = geojson.merge(county_counts, left_on="coty_name", right_on="county_name", how="outer") 
df_final = df_final[~df_final['geometry'].isna()]
df_final

Unnamed: 0,coty_code,coty_gnis_code,coty_name_long,ste_name,coty_type,year,coty_area_code,ste_code,coty_fp_code,coty_name,geometry,county_name,loc_count,total_count
0,49037,1448032,San Juan County,Utah,county,2020,USA,49,37,San Juan,"POLYGON ((-109.92799 38.15188, -109.92719 38.14796, -109.92937 38.13992, -109.93323 38.13630, -109.94366 38.13516, -109.95677 38.12945, -109.96269 38.11937, -109.96364 38.11433, -109.96950 38.11198, -109.97793 38.11010, -109.98214 38.10679, -109.98504 38.10178, -109.99250 38.09995, -109.99595 38.10052, -110.00102 38.10663, -110.00576 38.10955, -110.00949 38.10966, -110.01668 38.10613, -110.026...",San Juan,9,189.0
1,49043,1448035,Summit County,Utah,county,2020,USA,49,43,Summit,"POLYGON ((-111.26497 41.14404, -111.27467 41.14257, -111.28972 41.14326, -111.29444 41.13913, -111.30009 41.13717, -111.30162 41.13442, -111.30701 41.13093, -111.31020 41.13049, -111.31846 41.13244, -111.32412 41.13745, -111.33239 41.13810, -111.33787 41.13304, -111.34096 41.13282, -111.34579 41.12872, -111.34886 41.12997, -111.36037 41.12919, -111.36660 41.13002, -111.36868 41.12201, -111.367...",Summit,12,596.0
2,49041,1448034,Sevier County,Utah,county,2020,USA,49,41,Sevier,"POLYGON ((-112.51850 38.51041, -112.44870 38.51042, -112.38000 38.51050, -112.32676 38.51085, -112.28840 38.51135, -112.26964 38.51215, -112.12575 38.51214, -112.06213 38.51047, -112.06197 38.50209, -112.04655 38.50191, -112.00166 38.50194, -112.00075 38.50223, -111.96100 38.50218, -111.83230 38.50219, -111.76639 38.50224, -111.75630 38.51015, -111.62526 38.51022, -111.56074 38.51008, -111.443...",Sevier,13,244.0
3,49051,1448039,Wasatch County,Utah,county,2020,USA,49,51,Wasatch,"POLYGON ((-110.89166 39.89965, -110.89197 40.00172, -110.89179 40.08240, -110.89627 40.08238, -110.89657 40.16383, -110.89666 40.21241, -110.89669 40.29772, -110.89675 40.36482, -110.89659 40.43118, -110.90293 40.43121, -110.90288 40.53457, -110.90248 40.58658, -110.90229 40.63465, -110.90183 40.64908, -110.90197 40.67816, -110.90195 40.68219, -110.91121 40.68199, -110.92472 40.68011, -110.933...",Wasatch,9,150.0
4,49021,1448025,Iron County,Utah,county,2020,USA,49,21,Iron,"POLYGON ((-114.04998 38.14876, -114.05004 38.09753, -114.05044 38.05425, -114.05029 38.03962, -114.05027 37.97947, -114.05050 37.95334, -114.05014 37.89481, -114.04966 37.88138, -114.04997 37.86785, -114.04947 37.83795, -114.04961 37.82354, -114.04830 37.80942, -114.04915 37.79491, -114.04986 37.76614, -114.05165 37.75150, -114.05201 37.67959, -114.05235 37.60478, -113.93803 37.60482, -113.863...",Iron,9,430.0
5,49017,1448023,Garfield County,Utah,county,2020,USA,49,17,Garfield,"POLYGON ((-112.47868 38.14742, -112.47873 38.08691, -112.47866 37.99997, -112.47897 37.97768, -112.46697 37.97766, -112.46694 37.95692, -112.46818 37.89046, -112.50080 37.89040, -112.58840 37.89042, -112.58847 37.88221, -112.57840 37.88215, -112.57855 37.80454, -112.64339 37.80670, -112.67213 37.80553, -112.68922 37.80559, -112.68916 37.72624, -112.68899 37.71721, -112.68576 37.71734, -112.686...",Garfield,9,67.0
6,49011,1448020,Davis County,Utah,county,2020,USA,49,11,Davis,"POLYGON ((-112.26022 40.76909, -112.14827 40.84613, -112.00657 40.92185, -111.95867 40.92182, -111.95860 40.90017, -111.96174 40.89336, -111.96615 40.89165, -111.96873 40.88690, -111.96756 40.88006, -111.97107 40.87664, -111.96289 40.87000, -111.96407 40.85997, -111.96588 40.85463, -111.95597 40.84921, -111.95162 40.84157, -111.94673 40.83796, -111.94287 40.83049, -111.94669 40.82384, -111.946...",Davis,51,671.0
7,49009,1448019,Daggett County,Utah,county,2020,USA,49,9,Daggett,"POLYGON ((-109.04894 40.66260, -109.04909 40.71450, -109.04879 40.75143, -109.04846 40.82608, -109.04884 40.83782, -109.04888 40.88776, -109.04928 40.90865, -109.05003 41.00069, -109.17869 41.00090, -109.19781 41.00118, -109.21325 41.00201, -109.23251 41.00211, -109.25001 41.00105, -109.30735 41.00052, -109.40023 40.99988, -109.51089 40.99905, -109.54370 40.99840, -109.62095 40.99835, -109.658...",Daggett,1,50.0
8,49029,1448028,Morgan County,Utah,county,2020,USA,49,29,Morgan,"POLYGON ((-111.42073 41.36131, -111.42401 41.35634, -111.42367 41.34829, -111.42668 41.34277, -111.43436 41.34226, -111.43780 41.33429, -111.43659 41.33125, -111.43737 41.32378, -111.44339 41.32206, -111.44471 41.31823, -111.44889 41.31626, -111.44842 41.31057, -111.44411 41.30629, -111.44255 41.29824, -111.43918 41.29512, -111.44282 41.29236, -111.44011 41.28958, -111.44449 41.28747, -111.446...",Morgan,6,33.0
9,49035,1448031,Salt Lake County,Utah,county,2020,USA,49,35,Salt Lake,"POLYGON ((-111.59388 40.57706, -111.59070 40.58014, -111.58266 40.58128, -111.57617 40.58331, -111.57249 40.58216, -111.56391 40.58505, -111.56002 40.59080, -111.56087 40.59375, -111.55840 40.59995, -111.55926 40.60152, -111.55327 40.60931, -111.55873 40.61324, -111.55751 40.61932, -111.56742 40.62328, -111.56847 40.62872, -111.57605 40.63278, -111.57775 40.63868, -111.57735 40.64258, -111.579...",Salt Lake,114,8426.0


In [88]:
ut_county_map = folium.Map(location=[39.5, -111], zoom_start=7)

map_variable = 'total_count' #possible variables: 'loc_count' & 'total_count'

folium.Choropleth(
            geo_data=geojson,
            data=df_final,
            columns=['coty_code', map_variable],  #Here we tell folium to get the county fips and plot new_cases_7days metric for each county
            key_on='feature.properties.coty_code', #Here we grab the geometries/county boundaries from the geojson file using the key 'coty_code' which is the same as county fips
            #threshold_scale=(df_final[map_variable].quantile((0, 0.2, 0.4, 0.6, 0.8, 1))).tolist(), #we can create our own custom scale if we like.
            fill_color='YlOrRd',
            nan_fill_color="White", #Use white color if there is no data available for the county
            fill_opacity=0.7,
            line_opacity=0.2,
            legend_name='', #title of the legend
            highlight=True,
            line_color='black').add_to(ut_county_map)

ut_county_map

CRSError: Invalid projection: EPSG:4326: (Internal Proj Error: proj_create: no database context specified)

# Combining USGS and NER

In [39]:
import pandas as pd

#Load UHQ data into a pandas dataframe
df = pd.read_excel("UHQ_FULL_EDITED.xlsx")

year_begin = 1928 #UHQ first year is 1928
year_end = 2021 #UHQ end year is 2021

#creating string containing all UHQ texts
df2 = df[(df['text'].notna()) & (df['year'] >= year_begin) & (df['year'] <= year_end)].reset_index()


#Look at the first article in our database
texts = df2.text.tolist()

places = []
#Run Named Entity Recognition on the text
for t in texts:
    document = nlp(t)
    for named_entity in document.ents:
        if named_entity.label_ == "GPE":
            places.append(named_entity.text.replace("’s", ""))
len(places)

94236

In [40]:
places_tally = Counter(places)
places_df = pd.DataFrame(places_tally.most_common(), columns=['place', 'count'])
places_df.to_csv("ner_places.csv")

In [72]:
places_df = pd.read_excel('spatial history/ner_places.xlsx')
places_df[:50]

Unnamed: 0,place,count
0,Utah,20358.0
1,Salt Lake City,5323.0
2,California,2243.0
3,the United States,1985.0
4,Colorado,1971.0
5,Ogden,1615.0
6,Salt Lake,1373.0
7,Washington,1230.0
8,America,1179.0
9,Nevada,968.0


In [73]:
usgs_df = pd.read_csv("spatial history/UTAH_USGS.csv")
feature = "Populated Place"
ut_usgs_df = usgs_df[(usgs_df.STATE_ALPHA == "UT") & (usgs_df.FEATURE_CLASS == feature)]


place_match = []
place_miss = []

for p in places_df.place.tolist():
    if p in ut_usgs_df.FEATURE_NAME.tolist():
        place_match.append(p)
    else:
        place_miss.append(p)

In [74]:
utah_df = ut_usgs_df.merge(places_df, left_on="FEATURE_NAME", right_on="place", how="inner")
utah_df

Unnamed: 0.1,Unnamed: 0,FEATURE_ID,FEATURE_NAME,FEATURE_CLASS,STATE_ALPHA,STATE_NUMERIC,COUNTY_NAME,COUNTY_NUMERIC,PRIMARY_LAT_DMS,PRIM_LONG_DMS,...,SOURCE_LONG_DMS,SOURCE_LAT_DEC,SOURCE_LONG_DEC,ELEV_IN_M,ELEV_IN_FT,MAP_NAME,DATE_CREATED,DATE_EDITED,place,count
0,1313684,1425050,Adamsville,Populated Place,UT,49,Beaver,1.0,381530N,1124738W,...,,,,1685.0,5528.0,Adamsville,12/31/1979,,Adamsville,1.0
1,1313731,1425097,Altonah,Populated Place,UT,49,Duchesne,13.0,402405N,1101738W,...,,,,2034.0,6673.0,Altonah,12/31/1979,,Altonah,1.0
2,1313835,1425202,Atwood,Populated Place,UT,49,Salt Lake,35.0,403758N,1115354W,...,,,,1323.0,4340.0,Salt Lake City South,12/31/1979,,Atwood,4.0
3,1314060,1425433,Benson,Populated Place,UT,49,Cache,5.0,414715N,1115549W,...,,,,1350.0,4429.0,Newton,12/31/1979,03/21/2008,Benson,3.0
4,1314332,1425709,Black Rock,Populated Place,UT,49,Millard,27.0,384256N,1125804W,...,,,,1480.0,4856.0,Black Rock,12/31/1979,09/23/2013,Black Rock,17.0
5,1314464,1425842,Bluff,Populated Place,UT,49,San Juan,37.0,371704N,1093306W,...,,,,1318.0,4324.0,Bluff,12/31/1979,03/27/2019,Bluff,111.0
6,1314487,1425865,Bonanza,Populated Place,UT,49,Uintah,47.0,400116N,1091038W,...,,,,1657.0,5436.0,Bonanza,12/31/1979,01/29/2011,Bonanza,1.0
7,1314887,1426269,Caineville,Populated Place,UT,49,Wayne,55.0,381959N,1110108W,...,,,,1402.0,4600.0,Caineville,12/31/1979,,Caineville,15.0
8,1314961,1426344,Cannonville,Populated Place,UT,49,Garfield,17.0,373401N,1120314W,...,,,,1794.0,5886.0,Cannonville,12/31/1979,03/21/2008,Cannonville,16.0
9,1314968,1426351,Carbonville,Populated Place,UT,49,Carbon,7.0,393712N,1105004W,...,,,,1707.0,5600.0,Price,12/31/1979,01/29/2011,Carbonville,2.0


In [87]:
utah_map = folium.Map(location=[39, -111], zoom_start=5)

min_number = 1

sub_df = utah_df#[utah_df.count >= min_number]

lat = list(sub_df["PRIM_LAT_DEC"])
long = list(sub_df["PRIM_LONG_DEC"])
rad = list(sub_df["count"])
name = list(sub_df["FEATURE_NAME"])

for lt,ln,r,n in zip(lat,long,rad,name):
    folium.CircleMarker(location = [lt,ln],
                        popup=n,
                        tiles='Stamen Terrain',
                        radius = 1).add_to(utah_map)
utah_map

In [76]:
min_number = 1

sub_df = utah_df#[ut_usgs_df.FEATURE_COUNTS >= min_number]

county_counts = pd.DataFrame(sub_df['COUNTY_NAME'].value_counts().reset_index())
county_counts.columns = ['county_name', 'loc_count']
total_count = []
for county in county_counts.county_name.tolist():
    #sub_df = ut_usgs_df[ut_usgs_df.COUNTY_NAME == county]
    total_count.append(sub_df[sub_df.COUNTY_NAME == county]['count'].sum())

county_counts['total_count'] = total_count

county_counts

Unnamed: 0,county_name,loc_count,total_count
0,Salt Lake,114,8426.0
1,Davis,51,671.0
2,Weber,40,1897.0
3,Utah,37,1399.0
4,Box Elder,24,715.0
5,Cache,23,790.0
6,Washington,23,2345.0
7,Millard,21,203.0
8,Carbon,19,429.0
9,Tooele,18,405.0


In [70]:
sub_df[sub_df.COUNTY_NAME == 'Washington']

Unnamed: 0.1,Unnamed: 0,FEATURE_ID,FEATURE_NAME,FEATURE_CLASS,STATE_ALPHA,STATE_NUMERIC,COUNTY_NAME,COUNTY_NUMERIC,PRIMARY_LAT_DMS,PRIM_LONG_DMS,...,SOURCE_LONG_DMS,SOURCE_LAT_DEC,SOURCE_LONG_DEC,ELEV_IN_M,ELEV_IN_FT,MAP_NAME,DATE_CREATED,DATE_EDITED,place,count
35,1329057,1440940,Enterprise,Populated Place,UT,49,Washington,53.0,373425N,1134309W,...,,,,1620.0,5315.0,Enterprise,12/31/1979,03/21/2008,Enterprise,4
71,1317520,1428951,Hurricane,Populated Place,UT,49,Washington,53.0,371031N,1131724W,...,,,,988.0,3241.0,Hurricane,12/31/1979,03/21/2008,Hurricane,41
80,1317952,1429398,LaVerkin,Populated Place,UT,49,Washington,53.0,371204N,1131611W,...,,,,973.0,3192.0,Hurricane,12/31/1979,03/21/2008,LaVerkin,1
120,1320458,1431946,Rockville,Populated Place,UT,49,Washington,53.0,370940N,1130218W,...,,,,1142.0,3747.0,Springdale West,12/31/1979,03/21/2008,Rockville,51
122,1320743,1432234,Santa Clara,Populated Place,UT,49,Washington,53.0,370759N,1133915W,...,,,,841.0,2759.0,Santa Clara,12/31/1979,03/21/2008,Santa Clara,227
126,1321368,1432867,Springdale,Populated Place,UT,49,Washington,53.0,371120N,1125955W,...,,,,1191.0,3907.0,Springdale East,12/31/1979,03/21/2008,Springdale,16
142,1322381,1433898,Virgin,Populated Place,UT,49,Washington,53.0,371230N,1131118W,...,,,,1102.0,3615.0,Virgin,12/31/1979,03/21/2008,Virgin,26
167,1325659,1437504,Bloomington,Populated Place,UT,49,Washington,53.0,370248N,1133622W,...,,,,771.0,2529.0,Saint George,12/31/1979,,Bloomington,12
191,1325725,1437570,Grafton,Populated Place,UT,49,Washington,53.0,371002N,1130448W,...,,,,1119.0,3671.0,Springdale West,12/31/1979,,Grafton,7
194,1325731,1437576,Gunlock,Populated Place,UT,49,Washington,53.0,371710N,1134548W,...,,,,1112.0,3648.0,Gunlock,12/31/1979,,Gunlock,14


In [77]:
#Import Libraries
import geopandas as gpd
import pandas as pd
import numpy as np
import folium
from folium.features import GeoJsonTooltip

#Read the geoJSON file using geopandas
geojson = gpd.read_file('spatial history/georef-united-states-of-america-county.geojson')
geojson=geojson[geojson['ste_name'] == "Utah"]

ut_county_map = folium.Map(location=[39.5, -111], zoom_start=7)

folium.Choropleth(geo_data=geojson).add_to(ut_county_map)

df_final = geojson.merge(county_counts, left_on="coty_name", right_on="county_name", how="outer") 
df_final = df_final[~df_final['geometry'].isna()]
df_final

CRSError: Invalid projection: epsg:4326: (Internal Proj Error: proj_create: no database context specified)

In [69]:
ut_county_map = folium.Map(location=[39.5, -111], zoom_start=7)

map_variable = 'total_count' #possible variables: 'loc_count' & 'total_count'

folium.Choropleth(
            geo_data=geojson,
            data=df_final,
            columns=['coty_code', map_variable],  #Here we tell folium to get the county fips and plot new_cases_7days metric for each county
            key_on='feature.properties.coty_code', #Here we grab the geometries/county boundaries from the geojson file using the key 'coty_code' which is the same as county fips
            #threshold_scale=(df_final[map_variable].quantile((0, 0.2, 0.4, 0.6, 0.8, 1))).tolist(), #we can create our own custom scale if we like.
            fill_color='YlOrRd',
            nan_fill_color="White", #Use white color if there is no data available for the county
            fill_opacity=0.7,
            line_opacity=0.2,
            legend_name='', #title of the legend
            highlight=True,
            line_color='black').add_to(ut_county_map)

ut_county_map

CRSError: Invalid projection: EPSG:4326: (Internal Proj Error: proj_create: no database context specified)