# Spatial History
Intro to Digital History, 4/4/2022
Portions adapted from https://melaniewalsh.github.io/Intro-Cultural-Analytics/07-Mapping/01-Mapping.html

In [1]:
#!pip install geopy
#!pip install folium
#!pip install geopandas

In [2]:
from geopy.geocoders import Nominatim
import folium

geolocator = Nominatim(user_agent="YOUR NAME's mapping app", timeout=2)

In [3]:
loc = "University of Utah"

location = geolocator.geocode(loc)

location

Location(University of Utah, 201, Presidents Circle, Salt Lake City, Salt Lake County, Utah, 84112, United States, (40.762813699999995, -111.83687191368261, 0.0))

In [4]:
print(location.address)

University of Utah, 201, Presidents Circle, Salt Lake City, Salt Lake County, Utah, 84112, United States


In [5]:
print(location.latitude, location.longitude)

40.762813699999995 -111.83687191368261


In [6]:
print(f"Class: {location.raw['class']} \nType: {location.raw['type']}")

Class: amenity 
Type: university


In [7]:
utah_map = folium.Map(location=[location.latitude, location.longitude],
                     zoom_start=14)

folium.Marker(location=[location.latitude, location.longitude],
             popup="University of Utah").add_to(utah_map)

utah_map

In [8]:
utah_map = folium.Map(location=[location.latitude, location.longitude],
                     zoom_start=10)

folium.CircleMarker(location=[location.latitude, location.longitude],
                    radius=10,
                    color = 'black',
                    fill_color = 'red',
                    fill_opacity = 0.6,
                    popup = "University of Utah").add_to(utah_map)

utah_map

In [9]:
possible_locations = geolocator.geocode("University of Utah", exactly_one=False)

for location in possible_locations:
    print(location.address)
    print(location.latitude, location.longitude)
    print(f"Importance: {location.raw['importance']}")

University of Utah, 201, Presidents Circle, Salt Lake City, Salt Lake County, Utah, 84112, United States
40.762813699999995 -111.83687191368261
Importance: 0.8557694184669417
Department of Linguistics, University of Utah, 255, Central Campus Drive, Salt Lake City, Salt Lake County, Utah, 84112, United States
40.7634795 -111.8408017
Importance: 0.31100000000000005
Outdoor Adventures, Presidents Circle, Salt Lake City, Salt Lake County, Utah, 84112, United States
40.7655129 -111.838165
Importance: 0.11100000000000002


# Where is Utah History?

Using Named Entity Recognition

In [10]:
#!pip install -U spacy
#!python -m spacy download en_core_web_sm
import spacy
from spacy import displacy
from collections import Counter
import pandas as pd
import en_core_web_sm
nlp = en_core_web_sm.load()

In [13]:
import pandas as pd

#Load UHQ data into a pandas dataframe
df = pd.read_excel("../UHQ_FULL_EDITED.xlsx")

In [14]:
#Look at the first article in our database
text = df.text.tolist()[0]

#Run Named Entity Recognition on the text
document = nlp(text)

displacy.render(document[:500], style="ent")

In [15]:
#Create a list of all location (GPE) entities in the text
location_list = []
for named_entity in document.ents:
    if named_entity.label_ == "GPE":
        location_list.append(named_entity.text.replace("’s", ""))

#Print our list
location_list

['Salt Lake City',
 'Salt Lake City',
 'Salt Lake City',
 'Utah',
 'Salt Lake City',
 'Amanda',
 'Amanda',
 'California',
 'Salt Lake City',
 'Park City',
 'New York',
 'Utah',
 'Ogden',
 'Washington',
 'Utah',
 'Utah',
 'Utah',
 'Michigan',
 'Salt Lake City',
 'Salt Lake City',
 'Utah',
 'Utah',
 'New York',
 'Chicago',
 'Chicago',
 'Herald',
 'Russia']

### Question: What are some potential problems that you see with this list? 

In [16]:
#Is Amanda a place???
loc = "Amanda"

location = geolocator.geocode(loc)
location

Location(Amanda, Fairfield County, Ohio, United States, (39.649508, -82.7443367, 0.0))

In [17]:
#We can use the geolocator library to locate the latitude and longitude for each location. 

location_tally = Counter(location_list)

location_df = pd.DataFrame(location_tally.most_common(), columns=['location', 'count'])

loc_list = location_df.location.tolist()

lat = []
lon = []

for l in loc_list:
    temp_loc = geolocator.geocode(l)
    lat.append(temp_loc.latitude)
    lon.append(temp_loc.longitude)

location_df['lat'] = lat
location_df['lon'] = lon
location_df

Unnamed: 0,location,count,lat,lon
0,Salt Lake City,7,40.75962,-111.886798
1,Utah,7,39.422519,-111.714358
2,Amanda,2,39.649508,-82.744337
3,New York,2,40.712728,-74.006015
4,Chicago,2,41.875562,-87.624421
5,California,1,36.701463,-118.755997
6,Park City,1,40.646092,-111.497996
7,Ogden,1,41.223005,-111.973843
8,Washington,1,38.895037,-77.036543
9,Michigan,1,43.621195,-84.682435


Note that this returns a single point in space. Are Utah, California, or even Russia one point on a map? 

In [18]:
world_map = folium.Map(location=[42, -102], zoom_start=1)
location_df = location_df.dropna(subset=['lat', 'lon'])

lat = list(location_df["lat"])
long = list(location_df["lon"])
rad = list(location_df["count"])
name = list(location_df["location"])

for lt,ln,r,n in zip(lat,long,rad,name):
    folium.CircleMarker(location = [lt,ln],
                        popup=n,
                        radius = r).add_to(world_map)
world_map

## Using US Geological Survey Data

In [2]:
import pandas as pd
usgs_df = pd.read_csv("UTAH_USGS.csv")
len(usgs_df)

31913

In [4]:
usgs_df[usgs_df.STATE_ALPHA == "UT"].FEATURE_CLASS.value_counts()[:20]

Valley             5730
Populated Place    3618
Spring             2554
Stream             2545
Summit             2351
Locale             1926
Church             1577
Reservoir           961
Building            906
Lake                899
School              870
Flat                775
Mine                704
Canal               576
Ridge               554
Cemetery            531
Park                455
Basin               445
Civil               403
Dam                 384
Name: FEATURE_CLASS, dtype: int64

In [5]:
usgs_df[(usgs_df.STATE_ALPHA == "UT") & (usgs_df.FEATURE_CLASS == "Mine")]

Unnamed: 0.1,Unnamed: 0,FEATURE_ID,FEATURE_NAME,FEATURE_CLASS,STATE_ALPHA,STATE_NUMERIC,COUNTY_NAME,COUNTY_NUMERIC,PRIMARY_LAT_DMS,PRIM_LONG_DMS,...,PRIM_LONG_DEC,SOURCE_LAT_DMS,SOURCE_LONG_DMS,SOURCE_LAT_DEC,SOURCE_LONG_DEC,ELEV_IN_M,ELEV_IN_FT,MAP_NAME,DATE_CREATED,DATE_EDITED
148,1313671,1425037,Aberdeen Mine,Mine,UT,49,Carbon,7.0,394218N,1104719W,...,-110.788492,,,,,2182.0,7159.0,Helper,12/31/1979,
172,1313695,1425061,Ajax,Mine,UT,49,Juab,23.0,395538N,1120646W,...,-112.112723,,,,,2169.0,7116.0,Eureka,12/31/1979,
175,1313698,1425064,Alaska,Mine,UT,49,Juab,23.0,395441N,1120535W,...,-112.093000,,,,,2032.0,6667.0,Eureka,12/31/1979,
191,1313714,1425080,Allah Mine,Mine,UT,49,Juab,23.0,395027N,1122307W,...,-112.385228,,,,,1960.0,6430.0,Cherry Creek,12/31/1979,
210,1313733,1425099,Alvarado Mine,Mine,UT,49,Tooele,45.0,401013N,1134842W,...,-113.811665,,,,,1600.0,5249.0,Gold Hill,12/31/1979,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29257,1342803,1455545,Tip Top Mine,Mine,UT,49,Tooele,45.0,402730N,1121949W,...,-112.330226,,,,,1965.0,6447.0,Stockton,02/25/1989,
29419,1342965,1455712,Gray Daun Mine,Mine,UT,49,San Juan,37.0,382015N,1090547W,...,-109.096497,,,,,2018.0,6621.0,Ray Mesa,10/01/1992,
29542,1343088,1455845,Utah Mine,Mine,UT,49,Utah,49.0,395624N,1120559W,...,-112.099667,,,,,2228.0,7310.0,Eureka,10/01/1992,
29632,1469262,1585926,Brillian Mine,Mine,UT,49,Emery,15.0,385241N,1103346W,...,-110.562658,,,,,2012.0,6601.0,Drowned Hole Draw,06/05/1979,12/21/2010


In [21]:
feature = "Populated Place"
ut_usgs_df = usgs_df[(usgs_df.STATE_ALPHA == "UT") & (usgs_df.FEATURE_CLASS == feature)]

In [22]:
ut_usgs_df[ut_usgs_df.FEATURE_NAME == "Salt Lake City"]

Unnamed: 0.1,Unnamed: 0,FEATURE_ID,FEATURE_NAME,FEATURE_CLASS,STATE_ALPHA,STATE_NUMERIC,COUNTY_NAME,COUNTY_NUMERIC,PRIMARY_LAT_DMS,PRIM_LONG_DMS,...,PRIM_LONG_DEC,SOURCE_LAT_DMS,SOURCE_LONG_DMS,SOURCE_LAT_DEC,SOURCE_LONG_DEC,ELEV_IN_M,ELEV_IN_FT,MAP_NAME,DATE_CREATED,DATE_EDITED
28787,1342331,1454997,Salt Lake City,Populated Place,UT,49,Salt Lake,35.0,404539N,1115328W,...,-111.891047,,,,,1300.0,4265.0,Salt Lake City North,12/31/1979,05/11/2011


In [23]:
year_begin = 1928 #UHQ first year is 1928
year_end = 2021 #UHQ end year is 2021

#creating string containing all UHQ texts
df2 = df[(df['text'].notna()) & (df['year'] >= year_begin) & (df['year'] <= year_end)].reset_index()

full_uhq_texts = ' '.join([str(elem) for elem in df2.text.tolist()])

In [24]:
usgs_counts = []

for fn in ut_usgs_df.FEATURE_NAME.tolist():
    usgs_counts.append(full_uhq_texts.count(fn))
    
ut_usgs_df['FEATURE_COUNTS'] = usgs_counts

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ut_usgs_df['FEATURE_COUNTS'] = usgs_counts


In [25]:
len(usgs_counts), len(ut_usgs_df)

(3618, 3618)

In [26]:
ut_usgs_df

Unnamed: 0.1,Unnamed: 0,FEATURE_ID,FEATURE_NAME,FEATURE_CLASS,STATE_ALPHA,STATE_NUMERIC,COUNTY_NAME,COUNTY_NUMERIC,PRIMARY_LAT_DMS,PRIM_LONG_DMS,...,SOURCE_LAT_DMS,SOURCE_LONG_DMS,SOURCE_LAT_DEC,SOURCE_LONG_DEC,ELEV_IN_M,ELEV_IN_FT,MAP_NAME,DATE_CREATED,DATE_EDITED,FEATURE_COUNTS
85,347050,395885,Utida,Populated Place,UT,49,Cache,5.0,415957N,1115831W,...,,,,,1411.0,4629.0,Trenton,12/31/1979,,2
150,1313673,1425039,Abraham,Populated Place,UT,49,Millard,27.0,392348N,1124305W,...,,,,,1400.0,4593.0,Sutherland,12/31/1979,,335
161,1313684,1425050,Adamsville,Populated Place,UT,49,Beaver,1.0,381530N,1124738W,...,,,,,1685.0,5528.0,Adamsville,12/31/1979,,1
207,1313730,1425096,Altamont,Populated Place,UT,49,Duchesne,13.0,402134N,1101711W,...,,,,,1947.0,6388.0,Altamont,12/31/1979,03/21/2008,0
208,1313731,1425097,Altonah,Populated Place,UT,49,Duchesne,13.0,402405N,1101738W,...,,,,,2034.0,6673.0,Altonah,12/31/1979,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31218,2199708,2708039,Covered Bridge Canyon,Populated Place,UT,49,Utah,49.0,400209N,1113312W,...,,,,,1559.0,5115.0,Spanish Fork Peak,11/04/2011,11/08/2011,0
31811,2260035,2783908,Interlaken,Populated Place,UT,49,Wasatch,51.0,403230N,1112823W,...,,,,,1805.0,5922.0,Heber City,04/28/2016,,0
31838,2263734,2791542,Cedar Highlands,Populated Place,UT,49,Iron,21.0,373815N,1130243W,...,,,,,2311.0,7582.0,Cedar City,05/17/2018,05/06/2021,0
31864,2268987,2797924,Emigration Canyon,Populated Place,UT,49,Salt Lake,35.0,404612N,1114535W,...,,,,,1639.0,5377.0,Fort Douglas,03/04/2019,,65


Question: Do you notice any potential problems with the displayed data above? Note that "Abraham" appears in UHQ 335 times, but do you think all of these occurances refer to the location?

In [27]:
place_name = "Salt Lake City"
ut_usgs_df[ut_usgs_df.FEATURE_NAME == place_name]

Unnamed: 0.1,Unnamed: 0,FEATURE_ID,FEATURE_NAME,FEATURE_CLASS,STATE_ALPHA,STATE_NUMERIC,COUNTY_NAME,COUNTY_NUMERIC,PRIMARY_LAT_DMS,PRIM_LONG_DMS,...,SOURCE_LAT_DMS,SOURCE_LONG_DMS,SOURCE_LAT_DEC,SOURCE_LONG_DEC,ELEV_IN_M,ELEV_IN_FT,MAP_NAME,DATE_CREATED,DATE_EDITED,FEATURE_COUNTS
28787,1342331,1454997,Salt Lake City,Populated Place,UT,49,Salt Lake,35.0,404539N,1115328W,...,,,,,1300.0,4265.0,Salt Lake City North,12/31/1979,05/11/2011,6049


In [28]:
utah_map = folium.Map(location=[39, -111], zoom_start=5)

min_number = 1

sub_df = ut_usgs_df[ut_usgs_df.FEATURE_COUNTS >= min_number]

lat = list(sub_df["PRIM_LAT_DEC"])
long = list(sub_df["PRIM_LONG_DEC"])
rad = list(sub_df["FEATURE_COUNTS"])
name = list(sub_df["FEATURE_NAME"])

for lt,ln,r,n in zip(lat,long,rad,name):
    folium.CircleMarker(location = [lt,ln],
                        popup=n,
                        tiles='Stamen Terrain',
                        radius = 1).add_to(utah_map)
utah_map

Try adjusting the "min_number" variable to see how your results change. Note that this approach is great at showing all of the places in Utah that are referenced at least 'n' number of times, but not great at showing volume. How much more likely is Salt Lake City or a place in Salt Lake County to appear than other locations in Utah? For that, we can turn to choropleth maps. 

## Choropleth Maps

In [3]:
#Import Libraries
import geopandas as gpd
import pandas as pd
import numpy as np
import folium
from folium.features import GeoJsonTooltip

#Read the geoJSON file using geopandas
geojson = gpd.read_file('georef-united-states-of-america-county.geojson')
geojson=geojson[geojson['ste_name'] == "Utah"]

ut_county_map = folium.Map(location=[39.5, -111], zoom_start=7)

folium.Choropleth(geo_data=geojson).add_to(ut_county_map)
ut_county_map

In [31]:
min_number = 1

sub_df = ut_usgs_df[ut_usgs_df.FEATURE_COUNTS >= min_number]

county_counts = pd.DataFrame(sub_df['COUNTY_NAME'].value_counts().reset_index())
county_counts.columns = ['county_name', 'loc_count']
total_count = []
for county in county_counts.county_name.tolist():
    #sub_df = ut_usgs_df[ut_usgs_df.COUNTY_NAME == county]
    total_count.append(sub_df[sub_df.COUNTY_NAME == county]['FEATURE_COUNTS'].sum())

county_counts['total_count'] = total_count

county_counts

Unnamed: 0,county_name,loc_count,total_count
0,Salt Lake,304,36060
1,Davis,145,20336
2,Weber,113,12296
3,Utah,63,6595
4,Box Elder,46,2626
5,Millard,31,4759
6,Tooele,31,2948
7,Cache,31,2638
8,Washington,28,4519
9,Carbon,25,2078


In [32]:
df_final = geojson.merge(county_counts, left_on="coty_name", right_on="county_name", how="outer") 
df_final = df_final[~df_final['geometry'].isna()]
df_final

Unnamed: 0,coty_code,coty_gnis_code,coty_name_long,ste_name,coty_type,year,coty_area_code,ste_code,coty_fp_code,coty_name,geometry,county_name,loc_count,total_count
0,49037,1448032,San Juan County,Utah,county,2020,USA,49,37,San Juan,"POLYGON ((-109.92799 38.15188, -109.92719 38.1...",San Juan,20,1328
1,49043,1448035,Summit County,Utah,county,2020,USA,49,43,Summit,"POLYGON ((-111.26497 41.14404, -111.27467 41.1...",Summit,22,2473
2,49041,1448034,Sevier County,Utah,county,2020,USA,49,41,Sevier,"POLYGON ((-112.51850 38.51041, -112.44870 38.5...",Sevier,21,3621
3,49051,1448039,Wasatch County,Utah,county,2020,USA,49,51,Wasatch,"POLYGON ((-110.89166 39.89965, -110.89197 40.0...",Wasatch,13,907
4,49021,1448025,Iron County,Utah,county,2020,USA,49,21,Iron,"POLYGON ((-114.04998 38.14876, -114.05004 38.0...",Iron,19,1419
5,49017,1448023,Garfield County,Utah,county,2020,USA,49,17,Garfield,"POLYGON ((-112.47868 38.14742, -112.47873 38.0...",Garfield,14,1593
6,49011,1448020,Davis County,Utah,county,2020,USA,49,11,Davis,"POLYGON ((-112.26022 40.76909, -112.14827 40.8...",Davis,145,20336
7,49009,1448019,Daggett County,Utah,county,2020,USA,49,9,Daggett,"POLYGON ((-109.04894 40.66260, -109.04909 40.7...",Daggett,2,62
8,49029,1448028,Morgan County,Utah,county,2020,USA,49,29,Morgan,"POLYGON ((-111.42073 41.36131, -111.42401 41.3...",Morgan,10,870
9,49035,1448031,Salt Lake County,Utah,county,2020,USA,49,35,Salt Lake,"POLYGON ((-111.59388 40.57706, -111.59070 40.5...",Salt Lake,304,36060


In [33]:
ut_county_map = folium.Map(location=[39.5, -111], zoom_start=7)

map_variable = 'total_count' #possible variables: 'loc_count' & 'total_count'

folium.Choropleth(
            geo_data=geojson,
            data=df_final,
            columns=['coty_code', map_variable],  #Here we tell folium to get the county fips and plot new_cases_7days metric for each county
            key_on='feature.properties.coty_code', #Here we grab the geometries/county boundaries from the geojson file using the key 'coty_code' which is the same as county fips
            #threshold_scale=(df_final[map_variable].quantile((0, 0.2, 0.4, 0.6, 0.8, 1))).tolist(), #we can create our own custom scale if we like.
            fill_color='YlOrRd',
            nan_fill_color="White", #Use white color if there is no data available for the county
            fill_opacity=0.7,
            line_opacity=0.2,
            legend_name='', #title of the legend
            highlight=True,
            line_color='black').add_to(ut_county_map)

ut_county_map

# Combining USGS and NER

In [34]:
# ##Takes a long time to run!
# import pandas as pd

# #Load UHQ data into a pandas dataframe
# df = pd.read_excel("UHQ_FULL_EDITED.xlsx")

# year_begin = 1928 #UHQ first year is 1928
# year_end = 2021 #UHQ end year is 2021

# #creating string containing all UHQ texts
# df2 = df[(df['text'].notna()) & (df['year'] >= year_begin) & (df['year'] <= year_end)].reset_index()


# #Look at the first article in our database
# texts = df2.text.tolist()

# places = []
# #Run Named Entity Recognition on the text
# for t in texts:
#     document = nlp(t)
#     for named_entity in document.ents:
#         if named_entity.label_ == "GPE":
#             places.append(named_entity.text.replace("’s", ""))
# len(places)

# places_tally = Counter(places)
# places_df = pd.DataFrame(places_tally.most_common(), columns=['place', 'count'])
# places_df.to_excel("ner_places.xlsx")

In [4]:
places_df = pd.read_excel('ner_places.xlsx')
places_df[:50]

Unnamed: 0,place,count
0,Utah,20358.0
1,Salt Lake City,5323.0
2,California,2243.0
3,the United States,1985.0
4,Colorado,1971.0
5,Ogden,1615.0
6,Salt Lake,1373.0
7,Washington,1230.0
8,America,1179.0
9,Nevada,968.0


In [5]:
usgs_df = pd.read_csv("UTAH_USGS.csv")
feature = "Populated Place"
ut_usgs_df = usgs_df[(usgs_df.STATE_ALPHA == "UT") & (usgs_df.FEATURE_CLASS == feature)]


place_match = []
place_miss = []

for p in places_df.place.tolist():
    if p in ut_usgs_df.FEATURE_NAME.tolist():
        place_match.append(p)
    else:
        place_miss.append(p)

In [6]:
utah_df = ut_usgs_df.merge(places_df, left_on="FEATURE_NAME", right_on="place", how="inner")
utah_df

Unnamed: 0.1,Unnamed: 0,FEATURE_ID,FEATURE_NAME,FEATURE_CLASS,STATE_ALPHA,STATE_NUMERIC,COUNTY_NAME,COUNTY_NUMERIC,PRIMARY_LAT_DMS,PRIM_LONG_DMS,...,SOURCE_LONG_DMS,SOURCE_LAT_DEC,SOURCE_LONG_DEC,ELEV_IN_M,ELEV_IN_FT,MAP_NAME,DATE_CREATED,DATE_EDITED,place,count
0,1313684,1425050,Adamsville,Populated Place,UT,49,Beaver,1.0,381530N,1124738W,...,,,,1685.0,5528.0,Adamsville,12/31/1979,,Adamsville,1.0
1,1313731,1425097,Altonah,Populated Place,UT,49,Duchesne,13.0,402405N,1101738W,...,,,,2034.0,6673.0,Altonah,12/31/1979,,Altonah,1.0
2,1313835,1425202,Atwood,Populated Place,UT,49,Salt Lake,35.0,403758N,1115354W,...,,,,1323.0,4340.0,Salt Lake City South,12/31/1979,,Atwood,4.0
3,1314060,1425433,Benson,Populated Place,UT,49,Cache,5.0,414715N,1115549W,...,,,,1350.0,4429.0,Newton,12/31/1979,03/21/2008,Benson,3.0
4,1314332,1425709,Black Rock,Populated Place,UT,49,Millard,27.0,384256N,1125804W,...,,,,1480.0,4856.0,Black Rock,12/31/1979,09/23/2013,Black Rock,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537,1557320,1681817,Portage,Populated Place,UT,49,Box Elder,3.0,415832N,1121226W,...,,,,1331.0,4367.0,Portage,04/12/1996,03/21/2008,Portage,1.0
538,1574708,1699809,Rush Valley,Populated Place,UT,49,Tooele,45.0,402147N,1122702W,...,,,,1537.0,5043.0,Saint John,08/16/1996,03/21/2008,Rush Valley,20.0
539,1715141,1852443,Garden,Populated Place,UT,49,Rich,33.0,415321N,1112309W,...,,,,1830.0,6004.0,Garden City,12/10/1999,03/21/2008,Garden,1.0
540,1727991,1867579,Millcreek,Populated Place,UT,49,Salt Lake,35.0,404113N,1115232W,...,,,,1306.0,4285.0,Salt Lake City South,05/15/2000,01/11/2017,Millcreek,4.0


In [7]:
#utah_df.to_excel("utah_pop_place.xlsx")

In [39]:
utah_map = folium.Map(location=[39, -111], zoom_start=5)

min_number = 1

sub_df = utah_df#[utah_df.count >= min_number]

lat = list(sub_df["PRIM_LAT_DEC"])
long = list(sub_df["PRIM_LONG_DEC"])
rad = list(sub_df["count"])
name = list(sub_df["FEATURE_NAME"])

for lt,ln,r,n in zip(lat,long,rad,name):
    folium.CircleMarker(location = [lt,ln],
                        popup=n,
                        tiles='Stamen Terrain',
                        radius = 1).add_to(utah_map)
utah_map

In [40]:
min_number = 1

sub_df = utah_df#[ut_usgs_df.FEATURE_COUNTS >= min_number]

county_counts = pd.DataFrame(sub_df['COUNTY_NAME'].value_counts().reset_index())
county_counts.columns = ['county_name', 'loc_count']
total_count = []
for county in county_counts.county_name.tolist():
    #sub_df = ut_usgs_df[ut_usgs_df.COUNTY_NAME == county]
    total_count.append(sub_df[sub_df.COUNTY_NAME == county]['count'].sum())

county_counts['total_count'] = total_count

county_counts

Unnamed: 0,county_name,loc_count,total_count
0,Salt Lake,114,8426.0
1,Davis,51,671.0
2,Weber,40,1897.0
3,Utah,37,1399.0
4,Box Elder,24,715.0
5,Washington,23,2345.0
6,Cache,23,790.0
7,Millard,21,203.0
8,Carbon,19,429.0
9,Tooele,18,405.0


In [41]:
sub_df[sub_df.COUNTY_NAME == 'Washington']

Unnamed: 0.1,Unnamed: 0,FEATURE_ID,FEATURE_NAME,FEATURE_CLASS,STATE_ALPHA,STATE_NUMERIC,COUNTY_NAME,COUNTY_NUMERIC,PRIMARY_LAT_DMS,PRIM_LONG_DMS,...,SOURCE_LONG_DMS,SOURCE_LAT_DEC,SOURCE_LONG_DEC,ELEV_IN_M,ELEV_IN_FT,MAP_NAME,DATE_CREATED,DATE_EDITED,place,count
35,1329057,1440940,Enterprise,Populated Place,UT,49,Washington,53.0,373425N,1134309W,...,,,,1620.0,5315.0,Enterprise,12/31/1979,03/21/2008,Enterprise,4.0
71,1317520,1428951,Hurricane,Populated Place,UT,49,Washington,53.0,371031N,1131724W,...,,,,988.0,3241.0,Hurricane,12/31/1979,03/21/2008,Hurricane,41.0
80,1317952,1429398,LaVerkin,Populated Place,UT,49,Washington,53.0,371204N,1131611W,...,,,,973.0,3192.0,Hurricane,12/31/1979,03/21/2008,LaVerkin,1.0
120,1320458,1431946,Rockville,Populated Place,UT,49,Washington,53.0,370940N,1130218W,...,,,,1142.0,3747.0,Springdale West,12/31/1979,03/21/2008,Rockville,51.0
122,1320743,1432234,Santa Clara,Populated Place,UT,49,Washington,53.0,370759N,1133915W,...,,,,841.0,2759.0,Santa Clara,12/31/1979,03/21/2008,Santa Clara,227.0
126,1321368,1432867,Springdale,Populated Place,UT,49,Washington,53.0,371120N,1125955W,...,,,,1191.0,3907.0,Springdale East,12/31/1979,03/21/2008,Springdale,16.0
142,1322381,1433898,Virgin,Populated Place,UT,49,Washington,53.0,371230N,1131118W,...,,,,1102.0,3615.0,Virgin,12/31/1979,03/21/2008,Virgin,26.0
167,1325659,1437504,Bloomington,Populated Place,UT,49,Washington,53.0,370248N,1133622W,...,,,,771.0,2529.0,Saint George,12/31/1979,,Bloomington,12.0
191,1325725,1437570,Grafton,Populated Place,UT,49,Washington,53.0,371002N,1130448W,...,,,,1119.0,3671.0,Springdale West,12/31/1979,,Grafton,7.0
194,1325731,1437576,Gunlock,Populated Place,UT,49,Washington,53.0,371710N,1134548W,...,,,,1112.0,3648.0,Gunlock,12/31/1979,,Gunlock,14.0


In [43]:
#Import Libraries
import geopandas as gpd
import pandas as pd
import numpy as np
import folium
from folium.features import GeoJsonTooltip

#Read the geoJSON file using geopandas
geojson = gpd.read_file('georef-united-states-of-america-county.geojson')
geojson=geojson[geojson['ste_name'] == "Utah"]

ut_county_map = folium.Map(location=[39.5, -111], zoom_start=7)

folium.Choropleth(geo_data=geojson).add_to(ut_county_map)

df_final = geojson.merge(county_counts, left_on="coty_name", right_on="county_name", how="outer") 
df_final = df_final[~df_final['geometry'].isna()]
df_final

Unnamed: 0,coty_code,coty_gnis_code,coty_name_long,ste_name,coty_type,year,coty_area_code,ste_code,coty_fp_code,coty_name,geometry,county_name,loc_count,total_count
0,49037,1448032,San Juan County,Utah,county,2020,USA,49,37,San Juan,"POLYGON ((-109.92799 38.15188, -109.92719 38.1...",San Juan,9,189.0
1,49043,1448035,Summit County,Utah,county,2020,USA,49,43,Summit,"POLYGON ((-111.26497 41.14404, -111.27467 41.1...",Summit,12,596.0
2,49041,1448034,Sevier County,Utah,county,2020,USA,49,41,Sevier,"POLYGON ((-112.51850 38.51041, -112.44870 38.5...",Sevier,13,244.0
3,49051,1448039,Wasatch County,Utah,county,2020,USA,49,51,Wasatch,"POLYGON ((-110.89166 39.89965, -110.89197 40.0...",Wasatch,9,150.0
4,49021,1448025,Iron County,Utah,county,2020,USA,49,21,Iron,"POLYGON ((-114.04998 38.14876, -114.05004 38.0...",Iron,9,430.0
5,49017,1448023,Garfield County,Utah,county,2020,USA,49,17,Garfield,"POLYGON ((-112.47868 38.14742, -112.47873 38.0...",Garfield,9,67.0
6,49011,1448020,Davis County,Utah,county,2020,USA,49,11,Davis,"POLYGON ((-112.26022 40.76909, -112.14827 40.8...",Davis,51,671.0
7,49009,1448019,Daggett County,Utah,county,2020,USA,49,9,Daggett,"POLYGON ((-109.04894 40.66260, -109.04909 40.7...",Daggett,1,50.0
8,49029,1448028,Morgan County,Utah,county,2020,USA,49,29,Morgan,"POLYGON ((-111.42073 41.36131, -111.42401 41.3...",Morgan,6,33.0
9,49035,1448031,Salt Lake County,Utah,county,2020,USA,49,35,Salt Lake,"POLYGON ((-111.59388 40.57706, -111.59070 40.5...",Salt Lake,114,8426.0


In [1]:
ut_county_map = folium.Map(location=[39.5, -111], zoom_start=7)

map_variable = 'total_count' #possible variables: 'loc_count' & 'total_count'

folium.Choropleth(
            geo_data=geojson,
            data=df_final,
            columns=['coty_code', map_variable],  #Here we tell folium to get the county fips and plot new_cases_7days metric for each county
            key_on='feature.properties.coty_code', #Here we grab the geometries/county boundaries from the geojson file using the key 'coty_code' which is the same as county fips
            #threshold_scale=(df_final[map_variable].quantile((0, 0.2, 0.4, 0.6, 0.8, 1))).tolist(), #we can create our own custom scale if we like.
            fill_color='YlOrRd',
            nan_fill_color="White", #Use white color if there is no data available for the county
            fill_opacity=0.7,
            line_opacity=0.2,
            legend_name='', #title of the legend
            highlight=True,
            line_color='black').add_to(ut_county_map)

ut_county_map

NameError: name 'folium' is not defined

## COUNTY SEARCH

In [45]:
year_begin = 1928 #UHQ first year is 1928
year_end = 2021 #UHQ end year is 2021

#creating string containing all UHQ texts
df2 = df[(df['text'].notna()) & (df['year'] >= year_begin) & (df['year'] <= year_end)].reset_index()

full_uhq_texts = ' '.join([str(elem) for elem in df2.text.tolist()])

In [46]:
counties = df_final.coty_name_long.tolist()

county_count = []
for c in counties:
    county_count.append(full_uhq_texts.count(c))

df_final['raw_counts'] = county_count

In [47]:
df_final

Unnamed: 0,coty_code,coty_gnis_code,coty_name_long,ste_name,coty_type,year,coty_area_code,ste_code,coty_fp_code,coty_name,geometry,county_name,loc_count,total_count,raw_counts
0,49037,1448032,San Juan County,Utah,county,2020,USA,49,37,San Juan,"POLYGON ((-109.92799 38.15188, -109.92719 38.1...",San Juan,9,189.0,165
1,49043,1448035,Summit County,Utah,county,2020,USA,49,43,Summit,"POLYGON ((-111.26497 41.14404, -111.27467 41.1...",Summit,12,596.0,72
2,49041,1448034,Sevier County,Utah,county,2020,USA,49,41,Sevier,"POLYGON ((-112.51850 38.51041, -112.44870 38.5...",Sevier,13,244.0,70
3,49051,1448039,Wasatch County,Utah,county,2020,USA,49,51,Wasatch,"POLYGON ((-110.89166 39.89965, -110.89197 40.0...",Wasatch,9,150.0,73
4,49021,1448025,Iron County,Utah,county,2020,USA,49,21,Iron,"POLYGON ((-114.04998 38.14876, -114.05004 38.0...",Iron,9,430.0,184
5,49017,1448023,Garfield County,Utah,county,2020,USA,49,17,Garfield,"POLYGON ((-112.47868 38.14742, -112.47873 38.0...",Garfield,9,67.0,12
6,49011,1448020,Davis County,Utah,county,2020,USA,49,11,Davis,"POLYGON ((-112.26022 40.76909, -112.14827 40.8...",Davis,51,671.0,143
7,49009,1448019,Daggett County,Utah,county,2020,USA,49,9,Daggett,"POLYGON ((-109.04894 40.66260, -109.04909 40.7...",Daggett,1,50.0,55
8,49029,1448028,Morgan County,Utah,county,2020,USA,49,29,Morgan,"POLYGON ((-111.42073 41.36131, -111.42401 41.3...",Morgan,6,33.0,8
9,49035,1448031,Salt Lake County,Utah,county,2020,USA,49,35,Salt Lake,"POLYGON ((-111.59388 40.57706, -111.59070 40.5...",Salt Lake,114,8426.0,313


In [49]:
ut_county_map = folium.Map(location=[39.5, -111], zoom_start=7)

map_variable = 'raw_counts' #possible variables: 'loc_count' & 'total_count'

folium.Choropleth(
            geo_data=geojson,
            data=df_final,
            columns=['coty_code', map_variable],  #Here we tell folium to get the county fips and plot new_cases_7days metric for each county
            key_on='feature.properties.coty_code', #Here we grab the geometries/county boundaries from the geojson file using the key 'coty_code' which is the same as county fips
            #threshold_scale=(df_final[map_variable].quantile((0, 0.2, 0.4, 0.6, 0.8, 1))).tolist(), #we can create our own custom scale if we like.
            fill_color='YlOrRd',
            nan_fill_color="White", #Use white color if there is no data available for the county
            fill_opacity=0.7,
            line_opacity=0.2,
            legend_name='', #title of the legend
            highlight=True,
            line_color='black').add_to(ut_county_map)

ut_county_map