# Import Packages

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
import gensim
import spacy

In [3]:
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [4]:
nlp = spacy.load('en_core_web_lg')

# Load Data

In [5]:
df = pd.read_csv("national_parks.csv")

In [6]:
df

Unnamed: 0,national_park,state,trail,activity,overall_rating,comment_title,comment_ratings,comment_text
0,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Turned back on 3/20/21 due to ice,4.0 of 5 bubbles,I have hiked to the fire tower a few times. It...
1,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Spectacular,5.0 of 5 bubbles,This trail was recommended in my Acadia travel...
2,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Great Trail,5.0 of 5 bubbles,Beech Mountain Trail is one of my favorites in...
3,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Best trail in Acadia,5.0 of 5 bubbles,We stumbled onto this trail and were very happ...
4,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Great trail for family,5.0 of 5 bubbles,My family has kids ranging from age 10 to 3. W...
...,...,...,...,...,...,...,...,...
3868,Zion National Park,Utah (UT),Zion Canyon Scenic Drive,Scenic Drives,5.0,Words Can't Adequality Describe This,5.0 of 5 bubbles,Breathtaking drive! Pictures just don't do th...
3869,Zion National Park,Utah (UT),Zion Canyon Scenic Drive,Scenic Drives,5.0,Beautiful Drive,5.0 of 5 bubbles,Spectacular scenery all along the route. Plent...
3870,Zion National Park,Utah (UT),Zion Canyon Scenic Drive,Scenic Drives,5.0,Spectacular Zion.,5.0 of 5 bubbles,The Zion Canyon Scenic Drive is spectacular! ...
3871,Zion National Park,Utah (UT),Zion Canyon Scenic Drive,Scenic Drives,5.0,Mix 'n match,4.0 of 5 bubbles,We experienced Zion Canyon Scenic Drive two ways.


In [7]:
df2 = pd.read_excel("coords.xlsx")

In [8]:
df2

Unnamed: 0,Latitude,Longitude,Park,State(s),Park Established,Area,Visitors (2018)
0,44.35,-68.21,Acadia,Maine,"February 26, 1919","49,075.26 acres (198.6 km2)",3537575
1,-14.25,-170.68,American Samoa,American Samoa,"October 31, 1988","8,256.67 acres (33.4 km2)",28626
2,38.68,-109.57,Arches,Utah,"November 12, 1971","76,678.98 acres (310.3 km2)",1663557
3,43.75,-102.50,Badlands,South Dakota,"November 10, 1978","242,755.94 acres (982.4 km2)",1008942
4,29.25,-103.25,Big Bend,Texas,"June 12, 1944","801,163.21 acres (3,242.2 km2)",440091
...,...,...,...,...,...,...,...
56,43.57,-103.48,Wind Cave,South Dakota,"January 9, 1903","33,970.84 acres (137.5 km2)",656397
57,61.00,-142.00,Wrangell–St. Elias,Alaska,"December 2, 1980","8,323,146.48 acres (33,682.6 km2)",79450
58,44.60,-110.50,Yellowstone,"Wyoming, Montana, Idaho","March 1, 1872","2,219,790.71 acres (8,983.2 km2)",4115000
59,37.83,-119.50,Yosemite,California,"October 1, 1890","761,747.50 acres (3,082.7 km2)",4009436


# Data Preprocessing with Regex

In [9]:
import re

Get string preceding 'National Park'

In [10]:
pattern = r'(.*?)(?:\s+National Park)?$'
result = re.findall(pattern, df['national_park'].iloc[0])

In [11]:
df['national_park'].iloc[0]

'Acadia National Park'

In [12]:
result

['Acadia', '']

In [13]:
result = re.findall(pattern, df['national_park'].iloc[170])
df['national_park'].iloc[170]

'Death Valley National Park'

In [14]:
result

['Death Valley', '']

In [15]:
df['national_park'].iloc[490]

'Cantwell'

In [16]:
result = re.findall(pattern, df['national_park'].iloc[490])
result

['Cantwell', '']

In [18]:
park = []
for row in df['national_park']:
  test_park = re.findall(pattern, row)
  park.append(test_park[0])

In [207]:
len(park) == len(df)

True

In [20]:
df['park'] = park

In [216]:
df['park']

0       Acadia
1       Acadia
2       Acadia
3       Acadia
4       Acadia
         ...  
3868      Zion
3869      Zion
3870      Zion
3871      Zion
3872      Zion
Name: park, Length: 3873, dtype: object

In [217]:
df2['Park']

0                 Acadia
1         American Samoa
2                 Arches
3               Badlands
4               Big Bend
             ...        
56             Wind Cave
57    Wrangell–St. Elias
58           Yellowstone
59              Yosemite
60                  Zion
Name: Park, Length: 61, dtype: object

In [23]:
national_parks = pd.merge(df, df2, left_on='park', right_on='Park')
national_parks.head()

Unnamed: 0,national_park,state,trail,activity,overall_rating,comment_title,comment_ratings,comment_text,park,Latitude,Longitude,Park,State(s),Park Established,Area,Visitors (2018)
0,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Turned back on 3/20/21 due to ice,4.0 of 5 bubbles,I have hiked to the fire tower a few times. It...,Acadia,44.35,-68.21,Acadia,Maine,"February 26, 1919","49,075.26 acres (198.6 km2)",3537575
1,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Spectacular,5.0 of 5 bubbles,This trail was recommended in my Acadia travel...,Acadia,44.35,-68.21,Acadia,Maine,"February 26, 1919","49,075.26 acres (198.6 km2)",3537575
2,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Great Trail,5.0 of 5 bubbles,Beech Mountain Trail is one of my favorites in...,Acadia,44.35,-68.21,Acadia,Maine,"February 26, 1919","49,075.26 acres (198.6 km2)",3537575
3,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Best trail in Acadia,5.0 of 5 bubbles,We stumbled onto this trail and were very happ...,Acadia,44.35,-68.21,Acadia,Maine,"February 26, 1919","49,075.26 acres (198.6 km2)",3537575
4,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Great trail for family,5.0 of 5 bubbles,My family has kids ranging from age 10 to 3. W...,Acadia,44.35,-68.21,Acadia,Maine,"February 26, 1919","49,075.26 acres (198.6 km2)",3537575


In [24]:
national_parks = national_parks.drop(columns = ['park', 'Park', 'State(s)', 'Park Established'])

In [25]:
national_parks.head()

Unnamed: 0,national_park,state,trail,activity,overall_rating,comment_title,comment_ratings,comment_text,Latitude,Longitude,Area,Visitors (2018)
0,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Turned back on 3/20/21 due to ice,4.0 of 5 bubbles,I have hiked to the fire tower a few times. It...,44.35,-68.21,"49,075.26 acres (198.6 km2)",3537575
1,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Spectacular,5.0 of 5 bubbles,This trail was recommended in my Acadia travel...,44.35,-68.21,"49,075.26 acres (198.6 km2)",3537575
2,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Great Trail,5.0 of 5 bubbles,Beech Mountain Trail is one of my favorites in...,44.35,-68.21,"49,075.26 acres (198.6 km2)",3537575
3,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Best trail in Acadia,5.0 of 5 bubbles,We stumbled onto this trail and were very happ...,44.35,-68.21,"49,075.26 acres (198.6 km2)",3537575
4,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Great trail for family,5.0 of 5 bubbles,My family has kids ranging from age 10 to 3. W...,44.35,-68.21,"49,075.26 acres (198.6 km2)",3537575


# Word Embedding and Comment Similarity Score

In [26]:
reference = national_parks.loc[0, 'comment_text']

In [27]:
reference #reference sentence

"I have hiked to the fire tower a few times. Its a great hike, and not too strenuous elevation gains.  If the NO rangers are up there ( in the summer) they used to allow you to go up the tower. We had to turn back on 3/20 because of hard pack solid ice. We had our Katoohla micro spikes on, and solid hiking poles, and knew they simply  wouldn't be enough if the ice was on the steeper sections.  We walked into the trailhead because the access road gate is still closed. After deciding to cross the lot and hike Beech Cliff Loop, which was much more clear of ice, and has excellent views of Echo Lake and the ocean out toward  Southwest Harbor. We returned to BH to hear of the recovery of a young couple from Rutland Massachusetts  who had fallen 100 feet to their death on Dorr Mountain Gorge Trail. The tragedy attributed to ice on the trails. Anyone not experienced with full crampon travel, and ice climbing training should never attempt to hike or climb on solid ice. The danger is severe.. "

In [28]:
reference_vec = nlp(reference) #vectorize our reference sentence

In [29]:
all_docs = [nlp(row) for row in national_parks['comment_text']]

In [30]:
w1 = "sunny"
w2 = "potato"

w1 = nlp(w1)
w2 = nlp(w2)

In [31]:
w1.similarity(w2)

0.17514441974822578

In [32]:
w1 = "sunny"
w2 = "rainy"

w1 = nlp(w1)
w2 = nlp(w2)

In [33]:
w1.similarity(w2)

0.7090924660356245

In [34]:
w1 = "sunny"
w2 = "sunshine"

w1 = nlp(w1)
w2 = nlp(w2)

In [35]:
w1.similarity(w2)

0.7648274794767422

In [36]:
sims = []
comment_id = []
for i in range(len(all_docs)):
  sim = all_docs[i].similarity(reference_vec)
  sims.append(sim)
  comment_id.append(i)
  sims_docs = pd.DataFrame(list(zip(comment_id, sims)), columns = ['Comment_ID', 'sims'])

In [37]:
sims_docs_sorted = sims_docs.sort_values(by = 'sims', ascending = False)

In [38]:
sims_docs_sorted

Unnamed: 0,Comment_ID,sims
0,0,1.000000
1552,1552,0.980410
1772,1772,0.978241
2282,2282,0.977842
1779,1779,0.977384
...,...,...
1154,1154,-0.115120
1168,1168,-0.115120
3303,3303,-0.159446
1188,1188,-0.165912


In [39]:
most_similar_comments = sims_docs_sorted['Comment_ID'][1:6]

In [40]:
similar_reviews = national_parks.iloc[most_similar_comments.values]

In [41]:
similar_reviews

Unnamed: 0,national_park,state,trail,activity,overall_rating,comment_title,comment_ratings,comment_text,Latitude,Longitude,Area,Visitors (2018)
1552,Grand Canyon National Park,Arizona (AZ),Grand Canyon South Rim,Canyons,5.0,The views do not disappoint!,5.0 of 5 bubbles,We were staying with family in Sun City (near ...,36.06,-112.14,"1,201,647.03 acres (4,862.9 km2)",6380495
1772,Great Smoky Mountains National Park,Tennessee (TN),Clingmans Dome,Mountains,4.5,Worth the trip,5.0 of 5 bubbles,Our group of all ages & abilities was able to ...,35.68,-83.53,"522,426.88 acres (2,114.2 km2)",11421200
2282,Olympic National Park,Washington (WA),Olympic Peninsula Loop Drive,Scenic Drives,4.5,Olympic National Park/Expansive and beautiful,5.0 of 5 bubbles,This summer my husband had work in Seattle. M...,47.97,-123.5,"922,649.41 acres (3,733.8 km2)",3104455
1779,Great Smoky Mountains National Park,Tennessee (TN),Clingmans Dome,Mountains,4.5,Not so easy but still worth it!,5.0 of 5 bubbles,Clingmans Dome is where Cherry Wonderdog came ...,35.68,-83.53,"522,426.88 acres (2,114.2 km2)",11421200
2187,North Cascades National Park,Washington (WA),Maple Pass Loop,Hiking Trails,5.0,Spectacular Hike in North Cascades Mountains,5.0 of 5 bubbles,We were looking for a great day hike in or nea...,48.7,-121.2,"504,780.94 acres (2,042.8 km2)",30085


In [42]:
national_parks.iloc[0:1]

Unnamed: 0,national_park,state,trail,activity,overall_rating,comment_title,comment_ratings,comment_text,Latitude,Longitude,Area,Visitors (2018)
0,Acadia National Park,Maine (ME),Beech Mountain Trail,Hiking Trails,4.5,Turned back on 3/20/21 due to ice,4.0 of 5 bubbles,I have hiked to the fire tower a few times. It...,44.35,-68.21,"49,075.26 acres (198.6 km2)",3537575


In [43]:
reference

"I have hiked to the fire tower a few times. Its a great hike, and not too strenuous elevation gains.  If the NO rangers are up there ( in the summer) they used to allow you to go up the tower. We had to turn back on 3/20 because of hard pack solid ice. We had our Katoohla micro spikes on, and solid hiking poles, and knew they simply  wouldn't be enough if the ice was on the steeper sections.  We walked into the trailhead because the access road gate is still closed. After deciding to cross the lot and hike Beech Cliff Loop, which was much more clear of ice, and has excellent views of Echo Lake and the ocean out toward  Southwest Harbor. We returned to BH to hear of the recovery of a young couple from Rutland Massachusetts  who had fallen 100 feet to their death on Dorr Mountain Gorge Trail. The tragedy attributed to ice on the trails. Anyone not experienced with full crampon travel, and ice climbing training should never attempt to hike or climb on solid ice. The danger is severe.. "

Similar Reviews

In [44]:
similar_reviews['comment_text'].iloc[0]



In [45]:
similar_reviews['comment_text'].iloc[3]

'Clingmans Dome is where Cherry Wonderdog came for a Friday night sunset. The hike up to the top was a lot more grueling than any of us anticipated. The path is completely paved, which allows for parents to push strollers and those in wheelchairs or handicapped scooters to roll instead of walk – at least in theory. We passed one couple halfway, torn between continuing to the lookout tower and turning around out of fear that their scooter battery might fail on route. I’m not sure how much elevation was gained from the parking lot to the actual summit, but it was rather significant, and the steep walk felt to be in excess of a mile. On the way, there were several vista points that provided ample opportunity for selfies and panoramic shots.'

In [46]:
similar_reviews['comment_text'].iloc[4]

'We were looking for a great day hike in or near the North Cascades National park in Washington state and found Maple Loop Trail on Tripadvisor and several other online travel forums. The 7.1 miles round trip did not sound challenging but we had to think twice about the elevation - up and down 2000 feet in those 7 miles. We checked with the lodge receptionist who encouraged us to go saying she has done the trail not too long ago and she was retirement age. The hike started from Rainy Lake area parking lot and we decided to go anti-clockwise (following the signs) so that we would go up more gradually and handle the steeper part on the way back. The incline was not bad and we soon started admiring wonderful views of the surrounding mountains. After going a bit further, we saw wonderful views of Lake Ann that we kept seeing repeatedly throughout the hike. After passing the Heather Meadows, a wonderful view of surrounding mountain ranges opened up and we could see a lot of colored bushes a

In [47]:
least_similar_comments = sims_docs_sorted['Comment_ID'][-5:]

In [48]:
national_parks.iloc[least_similar_comments.values]

Unnamed: 0,national_park,state,trail,activity,overall_rating,comment_title,comment_ratings,comment_text,Latitude,Longitude,Area,Visitors (2018)
1154,Everglades National Park,Florida (FL),Mahogany Hammock,Nature & Wildlife Areas,4.0,Easy short walk suitable for everyone through ...,4.0 of 5 bubbles,Hello,25.32,-80.93,"1,508,934.25 acres (6,106.4 km2)",597124
1168,Everglades National Park,Florida (FL),Royal Palm Visitor Center,Visitor Centers,4.5,This place is the best place in The Everglades...,5.0 of 5 bubbles,Hello,25.32,-80.93,"1,508,934.25 acres (6,106.4 km2)",597124
3303,Zion National Park,Utah (UT),Observation Point,Points of Interest & Landmarks,5.0,Hot but worth it,5.0 of 5 bubbles,*** Observation Point:,37.3,-113.05,"147,237.02 acres (595.8 km2)",4320033
1188,Everglades National Park,Florida (FL),Anhinga Trail,Hiking Trails,4.5,"Beautiful, but over 80% less wildlife than I s...",4.0 of 5 bubbles,***IMPORTANT UPDATE***,25.32,-80.93,"1,508,934.25 acres (6,106.4 km2)",597124
854,Capitol Reef National Park,Utah (UT),Burr Trail,Off-Road & ATV Trails,5.0,THE BURR TRAIL (UTAH'S HIDDEN BYWAYS I),5.0 of 5 bubbles,THE BURR TRAIL (UTAH’S HIDDEN BYWAYS I),38.2,-111.17,"241,904.50 acres (979.0 km2)",1227627


# Streamlining the process with functions

In [157]:
def comment_similarity(parks_data, comment_index, all_comments):
  example_comment = parks_data.loc[comment_index, 'comment_text']
  reference_comment = nlp(example_comment) #vectorize our reference sentence
  simularity_score = []
  row_id = []
  for i in range(len(all_comments)):
    sim_score = all_comments[i].similarity(reference_comment)
    simularity_score.append(sim_score)
    row_id.append(i)
  simularity_docs = pd.DataFrame(list(zip(row_id, simularity_score)), columns = ['Comment_ID', 'sims'])
  simularity_docs_sorted = simularity_docs.sort_values(by = 'sims', ascending = False)
  most_similar_comments = simularity_docs_sorted['Comment_ID'][1:6]
  new_reviews = national_parks.iloc[most_similar_comments.values]
  return(new_reviews)


In [None]:
all_docs = [nlp(row) for row in national_parks['comment_text']]

In [158]:
showcase = comment_similarity(national_parks, 999, all_docs)
showcase

Unnamed: 0,national_park,state,trail,activity,overall_rating,comment_title,comment_ratings,comment_text,Latitude,Longitude,Area,Visitors (2018)
1861,Kenai Fjords National Park,Alaska (AK),Six Mile Creek,Bodies of Water,4.5,White water and then some,4.0 of 5 bubbles,We passed the creek along the Seward Highway a...,59.92,-149.65,"669,650.05 acres (2,710.0 km2)",321596
1344,Glacier National Park,Montana (MT),Bowman Lake,Bodies of Water,4.5,Can't describe how beautiful it is!,5.0 of 5 bubbles,"It is a long and bumpy ride to the lake, but i...",48.8,-114.0,"1,013,125.99 acres (4,100.0 km2)",2965309
690,Canyonlands National Park,Utah (UT),Shafer Trail,Hiking Trails,4.5,Beautiful drive!,5.0 of 5 bubbles,We took the Shafer trail from Canyonlands all ...,38.2,-109.93,"337,597.83 acres (1,366.2 km2)",739449
2409,Petrified Forest National Park,Arizona (AZ),Long Logs Loop,Nature & Wildlife Areas,5.0,Once in a lifetime sight,5.0 of 5 bubbles,Seeing the petrified forest and the wood scatt...,35.07,-109.78,"221,390.21 acres (895.9 km2)",644922
1610,Grand Teton National Park,Wyoming (WY),Taggart Lake,Hiking Trails,5.0,Amazing Experience,5.0 of 5 bubbles,"We were only at Grand Teton for one day, but t...",43.73,-110.8,"310,044.22 acres (1,254.7 km2)",3491151


In [159]:
example_comment = national_parks.loc[999, 'comment_text']
example_comment

'There are two ways to get out here. One is by boat and the other is by air. The boat ride is long no doubt about it but it is a part of the experience.  We watched the seaplane land and leave and quite frankly wished we had taken that option but the cost is considerably higher than the boat. '

In [218]:
showcase['comment_text'].iloc[1]

'It is a long and bumpy ride to the lake, but it is well worth it.  The view of the lake from the end of the lake by the parking lot is breath taking.  We hiked one of the trails and it was easy and well marked.  Seldom will one be disappointed with a visit.'

In [161]:
def comment_similarity(parks_data, comment_index, all_comments):
  example_comment = parks_data.loc[comment_index, 'comment_text']
  reference_comment = nlp(example_comment) #vectorize our reference sentence
  simularity_score = []
  row_id = []
  for i in range(len(all_comments)):
    sim_score = all_comments[i].similarity(reference_comment)
    simularity_score.append(sim_score)
    row_id.append(i)
  simularity_docs = pd.DataFrame(list(zip(row_id, simularity_score)), columns = ['Comment_ID', 'sims'])
  simularity_docs_sorted = simularity_docs.sort_values(by = 'sims', ascending = False)
  most_similar_comments = simularity_docs_sorted['Comment_ID'][1:2]
  new_reviews = national_parks.iloc[most_similar_comments.values]
  return(new_reviews)

In [162]:
test_case = national_parks.iloc[420:421]
test_case

Unnamed: 0,national_park,state,trail,activity,overall_rating,comment_title,comment_ratings,comment_text,Latitude,Longitude,Area,Visitors (2018)
420,Big Bend National Park,Texas (TX),Boquillas Canyon,Canyons,4.5,Don’t Miss This End of the Park,5.0 of 5 bubbles,The canyon views are spectacular. The drive in...,29.25,-103.25,"801,163.21 acres (3,242.2 km2)",440091


In [163]:
h = test_case['trail'].values[0]
h

'Boquillas Canyon'

In [164]:
subscripts = national_parks[national_parks['trail'] == h].index
subscripts

Int64Index([420, 421, 422, 423, 424, 425, 426, 427, 428, 429], dtype='int64')

In [165]:
test = []
for number in subscripts:
  print(number)
  test.append(comment_similarity(national_parks, number, all_docs))


420
421
422
423
424
425
426
427
428
429


In [166]:
stacked_df = pd.concat(test)
stacked_df

Unnamed: 0,national_park,state,trail,activity,overall_rating,comment_title,comment_ratings,comment_text,Latitude,Longitude,Area,Visitors (2018)
322,Badlands National Park,South Dakota (SD),Big Badlands Overlook,Geologic Formations,5.0,Awesome Introduction to the Badlands,5.0 of 5 bubbles,Big Badlands Overlook was our first stop after...,43.75,-102.5,"242,755.94 acres (982.4 km2)",1008942
1042,Channel Islands National Park,California (CA),Santa Rosa Island,Islands,5.0,Galápagos of California!,5.0 of 5 bubbles,I love the Channel Islands! It is the perfect ...,34.01,-119.42,"249,561.00 acres (1,009.9 km2)",366250
1478,Grand Canyon National Park,Arizona (AZ),Grandview Point,Lookouts,5.0,Night visit,5.0 of 5 bubbles,We wanted to experience the Grand Canyon durin...,36.06,-112.14,"1,201,647.03 acres (4,862.9 km2)",6380495
2187,North Cascades National Park,Washington (WA),Maple Pass Loop,Hiking Trails,5.0,Spectacular Hike in North Cascades Mountains,5.0 of 5 bubbles,We were looking for a great day hike in or nea...,48.7,-121.2,"504,780.94 acres (2,042.8 km2)",30085
2461,Petrified Forest National Park,Arizona (AZ),Rainbow Forest,Forests,4.5,The geography is as interesting as the petrifi...,5.0 of 5 bubbles,"Maybe it should have been more apparent to me,...",35.07,-109.78,"221,390.21 acres (895.9 km2)",644922
3169,Mount Rainier National Park,Washington (WA),Tipsoo Lake Loop,Hiking Trails,4.5,Naches/Tipsoo Lake Loop was the highlight of o...,5.0 of 5 bubbles,This was an absolutely breathtaking hike in th...,46.85,-121.75,"236,381.64 acres (956.6 km2)",1518491
1984,Mammoth Cave National Park,Kentucky (KY),Cedar Sink Trail,Hiking Trails,4.5,To Fully Appreciate the Caves Hike This Trail,5.0 of 5 bubbles,As it was over 100 hundred degrees for both da...,37.18,-86.1,"54,011.91 acres (218.6 km2)",533206
2688,Shenandoah National Park,Virginia (VA),Rose River Falls,Waterfalls,4.5,Nice trail but missed the falls,4.0 of 5 bubbles,We took the advice of a ranger and hiked Story...,38.53,-78.35,"199,217.77 acres (806.2 km2)",1264880
191,Arches National Park,Utah (UT),Landscape Arch,Hiking Trails,4.5,Landscape Arch,5.0 of 5 bubbles,We chose to drive to the back of Arches Nation...,38.68,-109.57,"76,678.98 acres (310.3 km2)",1663557
3293,Zion National Park,Utah (UT),Kolob Canyons,Canyons,4.5,Kolob Canyon-Taylor Creek Trail,5.0 of 5 bubbles,LOVED Kolob Canyons. This is the quiet part o...,37.3,-113.05,"147,237.02 acres (595.8 km2)",4320033


In [167]:
national_parks['comment_text'].iloc[420]

'The canyon views are spectacular. The drive into the trail’s end is just one view after the next as is the whole Big Bend experience. There weren’t many locals out but we did see some rowing on the Rio Grande and a few local wares for sale. We bought a fabulous handcrafted walking stick for a great price.  What a wonderful adventure in the canyon  the views of the Rio Grand are perfect here'

In [168]:
stacked_df['comment_text'].iloc[0]

'Big Badlands Overlook was our first stop after entering the park from the northeast. What an awesome introduction to the Badlands! There is a parking lot, and a boardwalk trail leading out onto a peninsular ridge. The views in all directions are spectacular! The layers in the vast array of rock formations is so beautiful. I highly recommend a stop at this overlook.'

In [169]:
stacked_df['comment_text'].iloc[1]

'I love the Channel Islands! It is the perfect way to escape the craziness going on in the mainland!  The trip out is half the fun. We saw a group of about 100 plus dolphins that chased us for part of the ride, and then we saw a whale off in the distance. The island itself is beautiful! White sand beaches with turquoise waters, and best of all, no one is there! We originally went to see Lobo Canyon, but the timing for the day trip is not long enough (it is nine miles round trip). We had to pivot, and instead walked along a ~2 mile beach strip, which was entrancing. The island is truly special. We were then treated to an unexpected treat on the ferry home, and stopped for a quick photo shoot at the Painted Cave. Truly special day, and I would highly recommend it!'

In [170]:
def total_similarity(trail, parks_data, all_comments):
  trail_subset = parks_data[parks_data['trail'] == trail].index
  total_df = []
  for number in trail_subset:
    total_df.append(comment_similarity(national_parks, number, all_docs))
  df = pd.concat(total_df)
  return(df)

In [171]:
output = total_similarity("Landscape Arch", national_parks, all_docs)
output

Unnamed: 0,national_park,state,trail,activity,overall_rating,comment_title,comment_ratings,comment_text,Latitude,Longitude,Area,Visitors (2018)
303,Badlands National Park,South Dakota (SD),Pinnacles Overlook,Points of Interest & Landmarks,5.0,Must See Pullover,5.0 of 5 bubbles,This is one of a handful of overlooks you have...,43.75,-102.5,"242,755.94 acres (982.4 km2)",1008942
235,Arches National Park,Utah (UT),Delicate Arch,Points of Interest & Landmarks,5.0,Delicate Arch,5.0 of 5 bubbles,Our family chose to hike to Delicate Arch late...,38.68,-109.57,"76,678.98 acres (310.3 km2)",1663557
863,Capitol Reef National Park,Utah (UT),Capitol Reef National Park,National Parks,4.5,Add Capitol Reef to Your Utah National Park List,5.0 of 5 bubbles,Just to the northeast of more popular parks Br...,38.2,-111.17,"241,904.50 acres (979.0 km2)",1227627
1310,Death Valley National Park,California (CA),Zabriskie Point,Geologic Formations,4.5,The Most Iconic Place in Death Valley,4.0 of 5 bubbles,You can't miss it. I don't mean you have to do...,36.24,-116.82,"3,373,063.14 acres (13,650.3 km2)",1678660
1611,Grand Teton National Park,Wyoming (WY),Taggart Lake,Hiking Trails,5.0,Do this hike if you want to feel like you're a...,5.0 of 5 bubbles,It's not a difficult hike and is right off the...,43.73,-110.8,"310,044.22 acres (1,254.7 km2)",3491151
222,Arches National Park,Utah (UT),Double Arch,Hiking Trails,5.0,Easy hike,5.0 of 5 bubbles,The Double Arch is unreal. It is massive and b...,38.68,-109.57,"76,678.98 acres (310.3 km2)",1663557
3198,Mount Rainier National Park,Washington (WA),Sunrise Visitor Center,Visitor Centers,4.5,Amazing views,5.0 of 5 bubbles,Amazing hikes of all varieties. Many travel up...,46.85,-121.75,"236,381.64 acres (956.6 km2)",1518491
1439,Glacier National Park,Montana (MT),Grinnell Glacier,Hiking Trails,5.0,Incredible vies and the end-point is rewarding,5.0 of 5 bubbles,This 13 mile hike from Many Glacier to upper G...,48.8,-114.0,"1,013,125.99 acres (4,100.0 km2)",2965309
1366,Glacier National Park,Montana (MT),Virginia Falls,Waterfalls,5.0,Magnificent Falls in Glacier National Park - w...,5.0 of 5 bubbles,This is the second falls on a hike in Glacier ...,48.8,-114.0,"1,013,125.99 acres (4,100.0 km2)",2965309
650,Canyonlands National Park,Utah (UT),Horseshoe Canyon,Canyons,5.0,WHOA! READ PLEASE. Things you NEED to know a...,5.0 of 5 bubbles,There are some older reviews. Some are VERY M...,38.2,-109.93,"337,597.83 acres (1,366.2 km2)",739449


# Plotly Functions

In [172]:
from plotly import express as px
import plotly.io as pio
import inspect
pio.renderers.default="iframe"

In [204]:
def plotting_parks(trail, parks_data, all_comments, **kwargs):
  output = total_similarity(trail, parks_data, all_comments)
  fig = px.scatter_mapbox(output, lon = "Longitude", lat = "Latitude", color = "overall_rating",
                        color_continuous_midpoint = 2.5, hover_name = "national_park", height = 600,
                        hover_data = ["Visitors (2018)", "activity", "trail", "overall_rating"],
                        title = "Recommended National Park Trails",
                        size_max=50,
                        **kwargs,
                        )
  return fig

In [205]:
color_map = px.colors.diverging.RdGy_r # produce a color map
fig = plotting_parks("Landscape Arch", national_parks, all_docs, mapbox_style="carto-positron",
                                   color_continuous_scale = color_map)


In [206]:
fig.show()