In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer, ENGLISH_STOP_WORDS, CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns
import matplotlib.pyplot as plt
from nltk.tokenize import word_tokenize
from sklearn.decomposition import LatentDirichletAllocation
import logging
import pyLDAvis
import pyLDAvis.sklearn
import warnings
from nlp_cleaning import *
from modeling import *
pyLDAvis.enable_notebook()

pd.set_option("display.max_rows", 50)
pd.set_option("display.max_columns", None)
pd.set_option('display.max_colwidth', None)

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
warnings.filterwarnings("ignore", category=DeprecationWarning)

%load_ext autoreload
%autoreload 2

In [33]:
df = pd.read_csv("Reviews_cleaned_for_NLP.csv")

In [3]:
df.columns

Index(['attraction_name', 'attraction_id', 'user_name', 'user_profile_link',
       'review_date', 'helpful_votes', 'rating', 'review_link', 'review_text',
       'review_title', 'experience_date', 'reviews_basic_clean',
       'review_lemma', 'review_remove_stop_words',
       'review_remove_additional_words'],
      dtype='object')

## Possible Future Cleaning

## Vectorizer & Topic Modeling - Round 1

The cleaned corpus is used for for creating a bag of words with TF-IDF
This will then be used for topic modeling.

In [35]:
topics_list = []

#To keep the column name used for modeling constant, this new column will be assgned to various other columns as more processing occurs. 
df['review_clean_modeling'] = df['review_remove_stop_words']

In [36]:
%%time

params = {'stop_words':'english','min_df': 10, 'max_df': 0.5, 'ngram_range':(1, 1),}

tfidf = TfidfVectorizer(**params)
review_word_matrix_tfidf = tfidf.fit_transform(df['review_clean_modeling'])
review_vocab_tfidf = tfidf.get_feature_names()

lda_tfidf, score_tfidf, topic_matrix_tfidf, word_matrix_tfidf = lda_topic_modeling(review_word_matrix_tfidf, vocab = review_vocab_tfidf, n = 20)

iteration: 1 of max_iter: 100
iteration: 2 of max_iter: 100
iteration: 3 of max_iter: 100
iteration: 4 of max_iter: 100
iteration: 5 of max_iter: 100
iteration: 6 of max_iter: 100
iteration: 7 of max_iter: 100
iteration: 8 of max_iter: 100
iteration: 9 of max_iter: 100
iteration: 10 of max_iter: 100
iteration: 11 of max_iter: 100
iteration: 12 of max_iter: 100
iteration: 13 of max_iter: 100
iteration: 14 of max_iter: 100
iteration: 15 of max_iter: 100
iteration: 16 of max_iter: 100
iteration: 17 of max_iter: 100
iteration: 18 of max_iter: 100
iteration: 19 of max_iter: 100
iteration: 20 of max_iter: 100
iteration: 21 of max_iter: 100
iteration: 22 of max_iter: 100
iteration: 23 of max_iter: 100
iteration: 24 of max_iter: 100
iteration: 25 of max_iter: 100
iteration: 26 of max_iter: 100
iteration: 27 of max_iter: 100
iteration: 28 of max_iter: 100
iteration: 29 of max_iter: 100
iteration: 30 of max_iter: 100
iteration: 31 of max_iter: 100
iteration: 32 of max_iter: 100
iteration: 33 of 

In [25]:
%%time

params2 = {'stop_words':'english','min_df': 10, 'max_df': 0.5, 'ngram_range':(1, 2),}

tfidf_2 = TfidfVectorizer(**params2)
review_word_matrix_tfidf_2 = tfidf_2.fit_transform(df['review_clean_modeling'])
review_vocab_tfidf_2 = tfidf_2.get_feature_names()

lda_tfidf_2, score_tfidf_2, topic_matrix_tfidf_2, word_matrix_tfidf_2 = lda_topic_modeling(review_word_matrix_tfidf_2, vocab = review_vocab_tfidf_2, n = 20)

iteration: 1 of max_iter: 100
iteration: 2 of max_iter: 100
iteration: 3 of max_iter: 100
iteration: 4 of max_iter: 100
iteration: 5 of max_iter: 100
iteration: 6 of max_iter: 100
iteration: 7 of max_iter: 100
iteration: 8 of max_iter: 100
iteration: 9 of max_iter: 100
iteration: 10 of max_iter: 100
iteration: 11 of max_iter: 100
iteration: 12 of max_iter: 100
iteration: 13 of max_iter: 100
iteration: 14 of max_iter: 100
iteration: 15 of max_iter: 100
iteration: 16 of max_iter: 100
iteration: 17 of max_iter: 100
iteration: 18 of max_iter: 100
iteration: 19 of max_iter: 100
iteration: 20 of max_iter: 100
iteration: 21 of max_iter: 100
iteration: 22 of max_iter: 100
iteration: 23 of max_iter: 100
iteration: 24 of max_iter: 100
iteration: 25 of max_iter: 100
iteration: 26 of max_iter: 100
iteration: 27 of max_iter: 100
iteration: 28 of max_iter: 100
iteration: 29 of max_iter: 100
iteration: 30 of max_iter: 100
iteration: 31 of max_iter: 100
iteration: 32 of max_iter: 100
iteration: 33 of 

In [7]:
%%time

params = {'stop_words':'english','min_df': 10, 'max_df': 0.5, 'ngram_range':(1, 1),}

tf = CountVectorizer(**params)
review_word_matrix_tf = tf.fit_transform(df['review_clean_modeling'])
review_vocab_tf = tf.get_feature_names()

lda_tf, score_tf, topic_matrix_tf, word_matrix_tf = lda_topic_modeling(review_word_matrix_tf, vocab = review_vocab_tf, n = 20)

iteration: 1 of max_iter: 100
iteration: 2 of max_iter: 100
iteration: 3 of max_iter: 100
iteration: 4 of max_iter: 100
iteration: 5 of max_iter: 100
iteration: 6 of max_iter: 100
iteration: 7 of max_iter: 100
iteration: 8 of max_iter: 100
iteration: 9 of max_iter: 100
iteration: 10 of max_iter: 100
iteration: 11 of max_iter: 100
iteration: 12 of max_iter: 100
iteration: 13 of max_iter: 100
iteration: 14 of max_iter: 100
iteration: 15 of max_iter: 100
iteration: 16 of max_iter: 100
iteration: 17 of max_iter: 100
iteration: 18 of max_iter: 100
iteration: 19 of max_iter: 100
iteration: 20 of max_iter: 100
iteration: 21 of max_iter: 100
iteration: 22 of max_iter: 100
iteration: 23 of max_iter: 100
iteration: 24 of max_iter: 100
iteration: 25 of max_iter: 100
iteration: 26 of max_iter: 100
iteration: 27 of max_iter: 100
iteration: 28 of max_iter: 100
iteration: 29 of max_iter: 100
iteration: 30 of max_iter: 100
iteration: 31 of max_iter: 100
iteration: 32 of max_iter: 100
iteration: 33 of 

In [8]:
%%time

params2 = {'stop_words':'english','min_df': 10, 'max_df': 0.5, 'ngram_range':(1, 2),}

tf_2 = CountVectorizer(**params2)
review_word_matrix_tf_2 = tf_2.fit_transform(df['review_clean_modeling'])
review_vocab_tf_2 = tf_2.get_feature_names()

lda_tf_2, score_tf_2, topic_matrix_tf_2, word_matrix_tf_2 = lda_topic_modeling(review_word_matrix_tf_2, vocab = review_vocab_tf_2, n = 20)

iteration: 1 of max_iter: 100
iteration: 2 of max_iter: 100
iteration: 3 of max_iter: 100
iteration: 4 of max_iter: 100
iteration: 5 of max_iter: 100
iteration: 6 of max_iter: 100
iteration: 7 of max_iter: 100
iteration: 8 of max_iter: 100
iteration: 9 of max_iter: 100
iteration: 10 of max_iter: 100
iteration: 11 of max_iter: 100
iteration: 12 of max_iter: 100
iteration: 13 of max_iter: 100
iteration: 14 of max_iter: 100
iteration: 15 of max_iter: 100
iteration: 16 of max_iter: 100
iteration: 17 of max_iter: 100
iteration: 18 of max_iter: 100
iteration: 19 of max_iter: 100
iteration: 20 of max_iter: 100
iteration: 21 of max_iter: 100
iteration: 22 of max_iter: 100
iteration: 23 of max_iter: 100
iteration: 24 of max_iter: 100
iteration: 25 of max_iter: 100
iteration: 26 of max_iter: 100
iteration: 27 of max_iter: 100
iteration: 28 of max_iter: 100
iteration: 29 of max_iter: 100
iteration: 30 of max_iter: 100
iteration: 31 of max_iter: 100
iteration: 32 of max_iter: 100
iteration: 33 of 

In [26]:
topic_matrix_tf[['raw_review','review_cleanned']] = df[['review_text','review_clean_modeling']]
topic_matrix_tf_2[['raw_review','review_cleanned']] = df[['review_text','review_clean_modeling']]
topic_matrix_tfidf_2[['raw_review','review_cleanned']] = df[['review_text','review_clean_modeling']]
topic_matrix_tfidf[['raw_review','review_cleanned']] = df[['review_text','review_clean_modeling']]

In [11]:
import pickle

with open ("lda_tfidf.pickle", "wb") as model:
    pickle.dump(lda_tfidf, model)

with open ("lda_tfidf_2.pickle", "wb") as model:
    pickle.dump(lda_tfidf_2, model)

with open ("lda_tf.pickle", "wb") as model:
    pickle.dump(lda_tf, model)

with open ("lda_tf_2.pickle", "wb") as model:
    pickle.dump(lda_tf_2, model)

In [None]:

pyLDAvis.sklearn.prepare(lda_tfidf, review_word_matrix_tfidf, tfidf, mds = 'tsne')

Let's start exploring the topics that were generated using LDA for upto 25 topics

In [28]:
topics_list = []

#### Topic 0 - Amazing Views
In particular, hiking at

In [37]:
topic = 0
n_reviews = 5
n_words = 10

print(top_words(word_matrix_tfidf_2, topic, n_words))
print(top_reviews(topic_matrix_tfidf_2, topic, n_reviews))
topics_list.append('Amazing Views')

adams              8.978884
ansel              8.674477
ansel adams        8.329235
view fantastic     3.800595
place beautiful    3.462722
make worthwhile    3.391562
labor              3.096900
capitan left       3.006288
labor day          2.987246
drive slow         2.814177
Name: topic_0, dtype: float64
['Once you emerge from the Tunnel, there is a parking area for all to pile out and take in the amazing views of Yosemite. You don’t want to miss it! You can get Ansel Adams quality shots.'
 "It's a hassle to get to and it's even more annoying once you're there with every tourist thinking he's Ansel Adams hogging the good photo spots...despite the annoyance, you cannot miss the best view in the park...simply breathtaking!"
 'Clean air, pristine landscape in summer and early fall.'
 "What a stunning view. It really is that good. Awe inspiring and humbling being confronted with that much awesome! It's almost too much for ones' eyeballs!"
 'Immense, awe inspiring, staggering, how many 

#### Topic 1 - 'Glacier Point'
In particular, the views at Glacier Point

In [38]:
topic = 1
n_reviews = 5
n_words = 10

print(top_words(word_matrix_tfidf_2, topic, n_words))
print(top_reviews(topic_matrix_tfidf_2, topic, n_reviews))
# topics_list.append('Glacier Point')

lower                       5.532638
beautiful picture           4.960246
breathtakingly              4.413730
picture justice             4.382677
rainbow fall                4.068597
make fall                   3.998642
blow                        3.842549
breathtakingly beautiful    3.822459
lower fall                  3.689144
valley look                 3.620313
Name: topic_1, dtype: float64
['To understand what you will see there check my pictures http://allcontravel.blogspot.com/2012/06/half-dome-yosemite-park-ca.html'
 "Such a beautiful mountain! It's mass almost can't be described by words. It is a beautiful thing to see!! "
 'Beautiful to see Pictures don’t do it justice In awe of the size and scale of it . Hats off to the climbers'
 'A somewhat knee shaking experience that you must see. Incredible view of valley waterfalls and Half Dome '
 'From far away the half dome can already be seen. From closer by, seen fro the valley, it is really impressive.']


#### Topic 2 - 'Yosemite Falls'
In particular, Upper and Lower Yosemite Falls

In [39]:
topic = 2
n_reviews = 5
n_words = 10

print(top_words(word_matrix_tfidf_2, topic, n_words))
print(top_reviews(topic_matrix_tfidf_2, topic, n_reviews))
topics_list.append('Yosemite Falls')

yosemite miss       6.897433
sunrise sunset      4.968883
capitan valley      3.769183
stay day            3.568557
hike spectacular    3.549470
place stay          3.346694
view especially     3.015461
young old           2.797496
pleasantly          2.526695
trail path          2.383792
Name: topic_2, dtype: float64
['We viewed half done from glazier point and boy was it spectacular! We could have stayed all day just drinking in the view!'
 "We followed t he Mist Trail to Vernal Falls. It was October so there wasn't a great deal of water but it was enough to thoroughly enjoy the experience!"
 'The view from the point is fabulous, but there are too many people to really enjoy it.  Is great at sunrise or sunset and less people.  Spectacular views especially of Half Dome.'
 'After coming out of the tunnel, we stopped here to see the beautiful valley and the surrounding mountains. Recommended!'
 'A lot to explore at the valley like trails, enjoy the views at the meadows, sunset, etc Also

**Topic 3 - Tioga Pass**  
In particular, driving Through Tioga Pass 

In [40]:
topic = 3
n_reviews = 5
n_words = 10

print(top_words(word_matrix_tfidf_2, topic, n_words))
print(top_reviews(topic_matrix_tfidf_2, topic, n_reviews))
topics_list.append('Tioga Pass')

chair            5.946935
place picture    5.882710
look forward     5.674061
forward          5.470924
shop snack       4.324968
wheel chair      4.242940
lot great        3.968965
marvelous        3.941094
hazy             3.842744
good camera      3.818459
Name: topic_3, dtype: float64
['This place actually makes you feel as if you are in a wonderland!! You will see unbelievably huge Sequoias and some of them are as old as 1800 years!! I guarantee that you will be lost for words to express your feelings!! '
 'Spectacular views from everywhere. There is a small shop/snack bar at the top which has various options available.'
 'It doesn’t take too long to drive up to this iconic spot and you must do it. There’s ample parking and the views are just stunning.'
 'Definately must do!Amazing, marvelous view, just to escape from the city jungle and enjoy with all your sences!!'
 'The views are awesome...many photo ops available. There is a gift shop and snack shop available at Glacier Point.

**Topic 4 - Hiking**  

In [42]:
topic = 4
n_reviews = 10
n_words = 10

print(top_words(word_matrix_tfidf_2, topic, n_words))
print(top_reviews(topic_matrix_tfidf_2, topic, n_reviews))
topics_list.append('Hiking')

bucket list         11.330308
bucket              11.185014
list                 9.568116
hike crowd           3.228005
sequoia mariposa     3.178893
just perfect         3.157951
unbelievable         3.149604
morning crowd        3.116572
definite             2.759127
woodpecker           2.542328
Name: topic_4, dtype: float64
['put this on your bucket list, you will not be disappointed.'
 'Put this on your bucket list.  Amazing views abound.  Well worth the drive..  Creation on glorious display.'
 'Worth seeing. Great hiking from here. Got a ride from friends then hiked down to the mist trail alongside the waterfalls. Simply breathtaking!'
 'Views were incredible here.  Concessions were not open before 11am, which we were not anticipating.  Plan your food better than we did!'
 'This area is a must on your bucket list. The views will take your breath away. You cannot see the Dome and not be moved.'
 'Beautiful area with some of the most fantastic views and scenery that I have ever wit

**Topic 5 - El Capitan**  
In particular, beauty of El Capitan

In [90]:
topic = 5
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('El Capitan')

el           4.528230
capitan      4.268026
climber      1.338473
binocular    0.415990
spot         0.403950
tunnel       0.400033
cap          0.394506
picture      0.362747
face         0.358512
meadow       0.304601
Name: topic_5, dtype: float64
['Seeing El Capitan was breathtaking. I had always seen pictures of El Capitan but seeing it is person was thrilling. It is so massive that it is really hard to process. We were there two days and I was truly captivated by El Capitan.'
 'Meadows around El Capitan make for a relaxing view of the majestic El Capitan. One of the beauties of Yosemite.'
 'One of our last stops in the valley, we took the El Capitan shuttle out to the El Capitan Picnic area and walked out to the next stop at the El Capitan crossover.  Relatively flat walk, no real trail, we just walked along side the road (moderate traffic) till we got to the Valley Loop Trail, which took us past Cathedral Beach and to the crossover.  Crossover was the best place to view El Capita

**Topic 6 - Walking To Lower Fall**  
In particular, 

In [93]:
topic = 6
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Walking To Lower Fall')

walk      5.419918
short     0.793609
minute    0.225155
path      0.224473
hard      0.213550
base      0.187959
lodge     0.170765
lovely    0.167322
mile      0.157958
uphill    0.143188
Name: topic_6, dtype: float64
['Once again; in summer: GET THERE EARLY! That means that if you want an experience that you would describe as "nice" you will have to be walking at about 07:30. If you get up to Vernall Falls and walk back along the John Muit trail be carefull not to miss the this trail. When you are on top of the Vernall falls you walk further up but when you come across a bridge you have gone to far. We forgot to take a map so walked up further. Eventually we ended up walking all the way to the top of Nevada falls! Ended up walking for 6,5 hours, almost 14km, 600m up and 600m downhill.'
 'It is a great short and easy walk to see the lower fall.  Lots of people, but the walk is beautiful and serence.'
 'We did do this walk, the start is easy to do. Because of that we did walk in a car

**Topic 7 - Amazing View**  
In particular, at the top of the falls & getting there

In [98]:
topic = 7
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Amazing Views')

worth         4.509240
effort        0.855170
definitely    0.733977
bit           0.669340
trip          0.567620
totally       0.362342
little        0.223373
absolutely    0.186453
tough         0.134280
trek          0.124622
Name: topic_7, dtype: float64
["Worth the hike up to see the falls - it is a bit of a climb but if you take it slow it is worth the trip. Very popular area so can be crowded but that shouldn't stop you"
 'Worth the trip up there, views are amazing! Drive up there a little scary, but worth the trip. Can see the valley below, great view.'
 "The Falls is beautiful and definitely worth the easy hike.  It does get crowded as it's not a huge area to view it but definitely worth it.  It's awesome!"
 'Best view of the entire valley and worth the drive to the top. We had two teens and one preteen all boys and they thought it was worth it.'
 "Don't give up even when the trail seems never-ending. The view at the top of the falls is SOOOOOOOOOO worth it. "]


**Topic 8 - Seasons for Falls**  
In particular, 

In [100]:
topic = 8
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Falls Viewing Season')

fall      6.459617
base      0.464369
dry       0.316726
spring    0.291243
spray     0.233936
summer    0.218177
bridge    0.177436
near      0.172987
run       0.164288
right     0.154591
Name: topic_8, dtype: float64
["Here is the most important thing to know about all the falls in Yosemite Park.  I have seen a number of people complaining in reviews that they were here in summer, fall, winter, whatever, and the falls were anemic. In California, it does not rain from late spring through mid-fall, and there is no water. The spectacular falls rely on spring snow melt, and so are at their best in early to mid spring.  Even in winter, when it begins to rain, rainfall alone is not enough to really get the falls going, it is when the snowpack melts that they thunder and roar.  So if it is your goal to see the falls at their best, it is essential you plan a spring trip.Yosemite Falls is one spectacular fall.  Well, actually, it is a 3fer! While it is not wide, it is reportedly the tallest 

**Topic 9 - Rock Climbing**  
In particular, 

In [102]:
topic = 9
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Rock Climbing')

rock          3.855680
climber       1.668572
face          1.046238
watch         0.829892
formation     0.472010
impressive    0.408355
binocular     0.359285
scale         0.353838
granite       0.350937
huge          0.304945
Name: topic_9, dtype: float64
['a MUST for a rock climber... but even if you are NOT - go check out the climbers , this is NUTS! one of the iconic rocks in the valley - how could you miss it if you are here anyway? '
 'We only experienced the great might of this 900m high rock face from the bottom, and it was impressive enough, let alone from its top. Ideal for rock climbers and base jumpers, but it is extremely tough to get to the top.'
 'As part of my day tour from SFO,  the tour operator stopped here. The rock formation is gigantic. I believe a lot of rock climbers come and camp here. I walked close to the rock and came back. Saw a herd of deer grazing there. I am not sure how one could climb this. the tour operator told us that the first climb took some 45

**Topic 10 - Great Views for Falls**  
In particular, 

In [105]:
topic = 10
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Great Views for Falls')

great          4.295647
photo          0.478787
opportunity    0.295339
fun            0.180673
hiking         0.131721
experience     0.131627
crowded        0.099593
view           0.099456
spot           0.094888
picture        0.085015
Name: topic_10, dtype: float64
['so many great views from one of the most beautiful place in the park, you can take a great tour bus up to the point for a great day of exploring or a great day hike. small store  and great views.'
 'It is a great hike to get to the top, steep in places. Great view at the top! Yosemite is just beautiful and it felt great to be at the top!'
 'Great for hiking - great views of the falls - there are 2 trails - lower and upper - upper one is much longer and stepper - great views as you hike up as well as down.'
 'It was great to see this fall, the view was great, it amazing how the water fall comes all the way from the top of mountain.'
 'Last time here we did the Panama trail which was great but this time we just looked a

**Topic 11 - Avoiding Crowds**  
In particular, going early to avoid crowds

In [108]:
topic = 11
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Avoiding Crowds')

early        2.584439
crowd        2.397241
morning      0.961203
avoid        0.888423
start        0.508107
summer       0.452922
busy         0.396791
late         0.389514
afternoon    0.312674
arrive       0.229653
Name: topic_11, dtype: float64
['Crowds below; crowds on the Mist Trail to the top, and busy at the top as well.  But worth it!  Go early!'
 'Visit in early morning or late afternoon to avoid the crowds'
 'We stop 1st here by car, was not to crowd early in the morning and you have a wonderful view of the full valley'
 'Drove up real early on my birthday. What a glorious way to begin my day. Going early is a must before the summer crowds arrive.'
 'Well maintained path with steep inclines.  To avoid the crowds start the hike early.  If you you are not there early it is recommended to take the shuttle to avoid parking problems.']


**Topic 12 - Park Views at Glacier Point**  
In particular, 

In [110]:
topic = 12
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Park Views')

park        3.121951
national    1.443777
yosemite    0.596151
car         0.309724
enter       0.183118
entrance    0.125020
leave       0.094775
service     0.093389
visitor     0.086358
stay        0.082068
Name: topic_12, dtype: float64
['This is a very beautiful point in the Yosemite National Park. There is so much to see in there. We really enjoyed everything about Yosemite National Park.'
 'Yosemite National Park is probably the best walking and hiking park in the 48 states.  We were their Sept. 10th through the 12th,  There were probably a hundred thousand or more in the park, but the park is so massive that it did not seem crowed. Their are extreme trails and moderate trails. This is a must see park. Not a park that you ride around in like Yellowstone but, worth the trip.'
 'You absolutely must go to Yosemite National Park, and Glacier Pint is a must-see! The rock formation is just another example of the stunning beauty within the national park!!'
 'Magnificent views over Yose

**Topic 13 - Glacier Point & Half Dome**  
In particular, 

In [113]:
topic = 13
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Glacier Point & Half Dome')

dome          3.519145
half          3.203864
sentinel      0.506813
sunset        0.498547
village       0.143268
mile          0.126365
include       0.124681
sun           0.120580
especially    0.119849
arrive        0.104188
Name: topic_13, dtype: float64
['This trail is a bit off the path of the half dome hike and worth doing because it mixes up the scenery of the half dome hike. '
 'Glacier Point give you a small taste of what you see from the top of Half Dome. There are also a few hiking trails that go to Half Dome or to the valley floor.'
 'After climbing the Half Dome we took a rest day to go to glacier point. It has a different view of the half dome. It is very popular and you can drive to it. '
 'The best view of the valley and Half dome. No one should miss this view, must visit. I have seen the half dome from this point multiple times. This is spectacular.'
 'Went to Glacier Point vista point to look over the Yosemite Valley below. You can see Half Dome, El Capitan, and al

**Topic 14 - Beautiful Falls**  
In particular, 

In [115]:
topic = 14
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Beautiful Falls')

beautiful     4.841553
scenery       0.385571
quite         0.231738
yosemite      0.141283
say           0.134240
sunset        0.132398
absolutely    0.130714
crowded       0.098738
stair         0.089163
nature        0.080211
Name: topic_14, dtype: float64
['Perhaps the most beautiful, although a bit stressful drive ever.  The further you go the more beautiful it gets.  '
 "Such a beautiful mountain! It's mass almost can't be described by words. It is a beautiful thing to see!! "
 'Beautiful falls, there are many falls in Yosemite National Park.  They all are beautiful against the backdrop of the surrounding scenery.'
 'All the falls were beautiful since we were there in the spring.'
 'These waterfalls are so beautiful they do not look real.  It is a beautiful postcard that is actually real.  One of the most beautiful scenes of the Yosemite National Park.']


**Topic 15 - Yosemite Valley**  
In particular, views of YV from Glaicer Point

In [117]:
topic = 15
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Yosemite Valley')

valley         4.560932
yosemite       2.360615
floor          1.118505
different      0.307364
entire         0.278964
mile           0.223804
hour           0.196074
lodge          0.174310
perspective    0.162521
sight          0.147274
Name: topic_15, dtype: float64
['A must side trip when visiting Yosemite Valley.  The drive takes time, but it is one of the best views of the valley. '
 'We busted here to get a view of the entire Yosemite valley. Views are utterly amazing, especially the drop down to the valley. '
 'The views here are utterly amazing. We like to stop here on the way into the valley, since it takes about an hour to get to the valley from here. It is about 4000 feet above the valley floor, with commanding views of Half Dome, Yosemite Falls, the valley floor, Vernal Falls, and Nevada Falls. You may also start several hikes here, including the aptly named 4-milte trail and 8-mile trail to the valley floor.It is so inspiring, I nearly proposed to my wife here. Didn\'t h

**Topic 3 -   d**  
In particular, 

In [118]:
topic = 16
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

amazing       3.970289
experience    0.356236
nature        0.163672
view          0.123779
scenery       0.120757
site          0.115519
awe           0.106738
simply        0.103922
glad          0.095835
stand         0.090288
Name: topic_16, dtype: float64
['Definately must do!Amazing, marvelous view, just to escape from the city jungle and enjoy with all your sences!!'
 'defi have a stop at glacier point - the views are just amazing from there. It does get crowded but you will find space for that amazing picture and views. '
 'This is a medium difficulty trail to the top of the waterfall. The view is amazing from the top and the source of waterfall is just an amazing site.'
 'It is amazing to think about how people climb this massive rock face! There are amazing views of this from many places around the park!'
 'The views and scenery are amazing, do it, you will love it!']


**Topic 3 -   d**  
In particular, 

In [119]:
topic = 17
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

water     3.600122
flow      0.781722
bring     0.698549
plenty    0.342464
spring    0.275069
dry       0.265402
summer    0.257024
snack     0.242907
bottle    0.226840
hot       0.193693
Name: topic_17, dtype: float64
['A waterfall with almost no water is always a bit disappointing.  nonetheless, without water one sees the marvelous sculpturing of the boulders beneath and treasures the water flow that is, while missing that which is not.'
 'It was definitely worth the walk to see the base of the falls even tho there was no water. You could see the outline of where the water would have been and just imagine what it would have been like with water flowing although it was sad to see the empty stream bed.'
 'This water was ragging down the apron into the pool. I had been there in Sept. and walked across the apron. Very dangerous. Slipping hazard and cold water. Imagine being so close to the top of a water fall. Hope they never move the fence. Stay out of the water. '
 'Water was at a tr

**Topic 3 -   d**  
In particular, 

In [120]:
topic = 18
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

place        4.750276
wonderful    0.349009
picture      0.284412
sunset       0.196402
love         0.172189
stunning     0.153094
truly        0.143140
life         0.141824
word         0.129325
list         0.128667
Name: topic_18, dtype: float64
['Need car to ger there, but is a amazing place. The road is beatiful, the place has a  impressive view. '
 'If you can only check out one place in Yosemite, this is the place. Great views of the valley and surrounding area.'
 'I was here in May 2014 and this place is on my short list of places that I really wanted to visit.  It was well worth the trip'
 "Really great place to visitIt's wonderful place to see a national park Visited in summer with family elders"
 'This place symbolizes the grandness of nature. Breathtaking views which are unique to Yosemite National Park. Visit the place early as the place has a very limited parking. You might have to wait for hours to get the shuttle to this point.']


**Topic 3 -   d**  
In particular, 

In [121]:
topic = 19
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

time          5.213040
spend         0.494372
wish          0.205573
need          0.170479
able          0.159155
stay          0.159064
experience    0.158379
want          0.147067
life          0.144978
plenty        0.130109
Name: topic_19, dtype: float64
['Try going in the winter time. The view is spectacular any time of year. There are few visitors, so you spend less time navigating and more time sight seeing.'
 "The best time to go would be around May because after that the Falls will run weaker. You can see it from the Valley and it's like your greetings card from Yosemite! Spend some time and go for a hike!"
 'Every time I visit Glacier Point, it is like I am visiting it for the first time. The views are breathtaking. There is so much to take in from every angle. The drive up can be a little nerve wracking so make sure you take your time and enjoy.'
 "I've been to this part of the Yosemite NP several times but each time I still find it interesting. It's always busy with heap 

**Topic 3 -   d**  
In particular, 

In [122]:
topic = 20
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

vernal        3.823835
nevada        2.738232
muir          0.854241
john          0.790360
fall          0.608544
continue      0.511172
mist          0.395145
past          0.319611
footbridge    0.318717
happy         0.259316
Name: topic_20, dtype: float64
['Vernal Fall is the closer of two with Nevada being further.  If you are looking for adventure and are in good shape, then do the Nevada Falls hike.  We took the John Muir trail to Nevada and it was beautiful!!  Then on our way back, we hit the Vernal Fall trail and immediately were immersed in wall to wall people.  Yikes, the Nevada and John Muir trails were almost empty compared to the cattle herding on Vernal.  But, it is a good hike for the elderly and people with small children.  Go early...as in before 9am.'
 'The proximity to half dome, and the view of Nevada and Vernal Falls below, make this a must see at Yosemite. '
 'To hike to the top of vernal falls is wet and worth every step.  If you are up to it go all the up to N

In [8]:
nmf_dict = {'nmf': [],'error': [], 'topic_matrix': [], 'word_matrix': []}

for i in range(0, 30):

    nmf, err, topic_matrix, word_matrix = nmf_topic_modeling(review_word_matrix, review_vocab, i+1)

    topic_matrix[['raw_review','review_cleanned']] = df[['review_text','review_lemma']]
    
    nmf_dict['nmf'].append(nmf)
    nmf_dict['error'].append(err)
    nmf_dict['topic_matrix'].append(topic_matrix)
    nmf_dict['word_matrix'].append(word_matrix)
    
    print(f"Progress Update On NMF Components: {i+1} currently.")

## Vectorizer & Topic Modeling - Round 2

Doing 20 topics only this time!

In [76]:
topics_list = []

In [54]:
# stop_words = ENGLISH_STOP_WORDS.union(['yosemite'])

tfidf = TfidfVectorizer(stop_words='english', min_df = 20, max_df = 0.5)
review_word_matrix = tfidf.fit_transform(df['review_lemma'])
review_vocab = tfidf.get_feature_names()


In [123]:
nmf_model, err, topic_matrix, word_matrix = nmf_topic_modeling(review_word_matrix, vocab = review_vocab, n = 20)
topic_matrix[['raw_review','review_cleanned']] = df[['review_text','review_lemma']]

Let's start exploring the topics that were generated using NMF for upto 50 topics

**Topic 0 - Mist Trail**  
In particular, hiking at Mist Trail

In [124]:
topic = 0
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Mist Trail')

trail       4.979392
mist        1.901066
muir        1.093938
nevada      1.019291
john        0.973173
vernal      0.935366
mile        0.463318
falls       0.445994
continue    0.308046
way         0.271942
Name: topic_0, dtype: float64
['I have been through this Trail twice.  First time was in June 2008 on the way to Half Dome and as the fall was in full blast, it was not a "mist" trail, it was more like a "freaking thunderstorming turning over a bucket" trail.  This time around I came down the mist trail on the Panorama Trail and as it was August, I barely got misted.  This is a pretty spectacular trail and a relatively short hike from Happy Isles (2 miles to the top or Vernal Fall or something like that).  It\'s worth a hike up to the top, but use the mist trail, not the John Muir trail.  '
 "The mist trail is a very strenuous hike but worth it.  If you don't think you can do it, then there's always the John Muir trail to the top of Nevada Falls.  The John Muir trail is easier bu

**Topic 1 - Glacier Point**  
In particular, the views at Glacier Point

In [125]:
topic = 1
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Glacier Point')

point       2.369260
glacier     2.048053
road        0.342321
view        0.279394
sentinel    0.165874
mile        0.144577
parking     0.137450
taft        0.126302
panorama    0.114262
washburn    0.112618
Name: topic_1, dtype: float64
["we saw this fall when we went to the Glacier point, it's already very beautiful when we saw in the in Glacier point, you can also see it at the Washburn point (which is a spot on the Glacier point road), there is a trail leading to the falls, you may do it if you have time."
 "We spent the day hiking around Glacier Point.  The Point itself is the one place up the Glacier Point road that you don't have to do a long hike to get to.  It is a relatively short walk to the Point and to the reward of a fabulous view.   Amazingly it was not crowded at around 10 am on a Monday.  There are a number of vantage points to enjoy the view.  Definitely take the time to get a different view of Yosemite and the valley.  There are bathrooms in the parking lot.You can

**Topic 2 - Yosemite Falls**  
In particular, Upper and Lower Yosemite Falls

In [130]:
topic = 2
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Yosemite Falls')

fall        2.732613
low         0.841530
upper       0.665215
vernal      0.307615
yosemite    0.288478
base        0.279379
close       0.195083
nevada      0.162833
spring      0.142781
winter      0.126650
Name: topic_2, dtype: float64
["Here is the most important thing to know about all the falls in Yosemite Park.  I have seen a number of people complaining in reviews that they were here in summer, fall, winter, whatever, and the falls were anemic. In California, it does not rain from late spring through mid-fall, and there is no water. The spectacular falls rely on spring snow melt, and so are at their best in early to mid spring.  Even in winter, when it begins to rain, rainfall alone is not enough to really get the falls going, it is when the snowpack melts that they thunder and roar.  So if it is your goal to see the falls at their best, it is essential you plan a spring trip.Yosemite Falls is one spectacular fall.  Well, actually, it is a 3fer! While it is not wide, it is rep

**Topic 3 - Driving Tioga Pass**  
In particular, driving Through Tioga Pass 

In [131]:
topic = 3
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Tioga Pass')

pass      1.459168
tioga     1.167593
road      1.136419
lake      0.926403
drive     0.775960
stop      0.443354
meadow    0.405786
open      0.404769
snow      0.352920
mono      0.296401
Name: topic_3, dtype: float64
["Tioga Pass is a small pass between the mountains on your way to enter the Yosemite Valley through the Tioga road from East. However; I presume most people tend to equate it with the entire Tioga road - which is an excellent 2 hour scenic drive through not so scary mountain roads, meadows and lakes from the town of Lee Vining to Yosemite Village. You come across several Yosemite attractions along the way (Tioga Lake, Tenaya Lake, Lembert Dome, Dog Lake, Tolumne meadows to name a few). Check the road condition before you travel; as it can be closed during snow (there's a sign on highway 395 that let's you know if the pass is open or closed). Amenities along the road are not available in winter; but in summer when the road is open you can get several eateries; the west e

**Topic 4 - Hiking**  

In [133]:
topic = 4
n_reviews = 10
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Hiking')

hike         2.846332
strenuous    0.335727
easy         0.307811
mile         0.297353
short        0.207582
worth        0.200180
hour         0.143952
day          0.131453
moderate     0.128889
falls        0.125093
Name: topic_4, dtype: float64
['We hiked to vernal falls and it is a moderate hike.  The trail is well kept and easy to hike on.  It is a beautiful and very peaceful hike.  We went early in the morning and the fog was over the valley which was very serene.'
 "we didn't hike to the top of the falls just the Yosemite valley easy hike to the base and it was a easy and great hike."
 'It is a strenuous hike but you will be rewarded. Please motivate yourself to do this hike. It will not take more than an hour even if you stop and go. Kids can do this also and I have seen many elderly people doing the hike. Incredible!'
 'Easy hike with kids and there were restrooms on the hike.'
 'It was quite strenuous hike but once you are up there you can feel achievement. The view along t

**Topic 5 - El Capitan**  
In particular, beauty of El Capitan

In [134]:
topic = 5
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('El Capitan')

el           2.081836
capitan      1.956899
climber      1.306769
rock         0.873246
face         0.564187
climb        0.446689
watch        0.445592
look         0.370616
binocular    0.355162
mountain     0.262432
Name: topic_5, dtype: float64
['El Capitan is one of if not the largest single piece of granite on the planet. To stand and look is always awe inspiring. I tell everyone who goes to yosemite to stop at every turnout and get out of the car and take the time to look in all directions. The views are incredible and the  sight of El Capitan is mind boggling. With you Binoculars be sure to scour the face of the rock for climbers. They are always there, and the enormity of the face of El Capitan makes the first climber hard to spot. Then magically you can find many climbers. What a view, what a place, Yosemite'
 'El Capitan is a high mountain with a sheer cliff face that is very popular with rock climbers. The valley roads and the Tunnel View give excellent views of El Capitan

**Topic 6 - Must Visit**  
In particular, 

In [137]:
topic = 6
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Must Visit')

valley       3.006317
yosemite     2.495410
floor        0.690864
view         0.482034
visit        0.364088
mountain     0.238934
different    0.235194
entire       0.224131
miss         0.203528
high         0.195139
Name: topic_6, dtype: float64
['A must side trip when visiting Yosemite Valley.  The drive takes time, but it is one of the best views of the valley. '
 'A must place when visiting to get good view of Yosemite Valley, and picture of Half Dome. Some drive from Yosemite Valley, but you can also take a bus. '
 'We busted here to get a view of the entire Yosemite valley. Views are utterly amazing, especially the drop down to the valley. '
 'worth a visit...while visiting Yosemite. There are amazing views of Yosemite Valley with great photo ops! takes about an hour to drive to Glacier Point from Yosemite Valley. totally worth it!'
 'This is a must see.  The road to Glacier Point takes about 1 hour from Yosemite Valley or from Wawona.  This is your opportunity to look down on

**Topic 7 - Walking Advice**  
In particular, at the top of the falls & getting there

In [139]:
topic = 7
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Walking Advice')

walk     3.081038
easy     1.181079
short    0.530749
low      0.407561
path     0.352119
falls    0.297157
base     0.275294
area     0.223578
nice     0.212982
stop     0.169405
Name: topic_7, dtype: float64
['It is a great short and easy walk to see the lower fall.  Lots of people, but the walk is beautiful and serence.'
 'The lower falls trail was a very easy walk and easy to get to. They were impressive to us, and when you walked across the bridge, the breeze and waterfall spit felt so good during a heat wave. The rocks in the water in the area were also fun to walk around on.'
 'We did do this walk, the start is easy to do. Because of that we did walk in a caravan of people. The second loop is more hard to walk and that is where it gets interesting and beautiful. '
 'This waterfall is definately worth the walk. very easy walk to lower falls.  Just for the amazing view and ferocious roar.'
 'Once again; in summer: GET THERE EARLY! That means that if you want an experience that you

**Topic 8 - Spectacular Views Advice**  
In particular, 

In [141]:
topic = 8
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Spectacular Views Advice')

good       2.699714
make       1.471864
sure       0.924108
view       0.591026
shoe       0.262313
stop       0.211635
way        0.204824
picture    0.194596
want       0.183415
thing      0.177757
Name: topic_8, dtype: float64
['From here you can see everything! Best views ever! You can also hike to/from here or drive to/from here.'
 "It's a good hike to the bridge. The upwards path is good to walk. It's moderated but with a few stops to watch the beautiful views, we made it. Loved it!"
 'Beautiful falls, make sure you make the trip to see this and bring a walking stick and good hiking boots.'
 'This is a good start to Yosemite! Make sure you have plenty of time on the way to stop and enjoy the views. When I was driving in here I knew I was getting into paradise!'
 'Amazing holiday with the most spectacular views. If you are in the valley, drive to get a good view of the Falls and get a pic of the valley from good distance back with you in it. Make sure to drive as much of the valle

**Topic 9 - Glacier Point Tour Bus**  
In particular, 

In [143]:
topic = 9
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Glacier Point Tour Bus')

bus       1.861141
tour      1.517727
way       1.063025
hour      0.802611
trip      0.722555
stop      0.513348
guide     0.463620
driver    0.429155
lodge     0.426720
day       0.340918
Name: topic_9, dtype: float64
['This bus “tour” was not a tour. If you’re traveling from out of town/state with a car rental, please do yourself a favor and save your money - Make the drive up to Glacier Point yourself! The bus “tour” was literally just a bus ride up!! Our driver was friendly and the tour “kind of” started out as a tour but all he was mentioning was the vegetation growing on the side of the road in the beginning. We also learned that a tunnel was made by men in the depression era looking to earn money for their families. That’s it. That’s all we learned, then slept the rest of the hour-plus drive up! Finally made it up and we’re given an hour to use the restroom, view, walk around, and get a snack. Since we booked the round-trip “tour” we felt this was rushed. We really began regret

**Topic 10 - Great Views**  
In particular, 

In [145]:
topic = 10
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Great Views')

great          2.844411
view           0.803425
photo          0.321877
opportunity    0.197631
waterfall      0.166340
really         0.166091
awesome        0.138772
enjoy          0.130102
fun            0.122655
experience     0.113691
Name: topic_10, dtype: float64
['so many great views from one of the most beautiful place in the park, you can take a great tour bus up to the point for a great day of exploring or a great day hike. small store  and great views.'
 'It is a great hike to get to the top, steep in places. Great view at the top! Yosemite is just beautiful and it felt great to be at the top!'
 'Great for hiking - great views of the falls - there are 2 trails - lower and upper - upper one is much longer and stepper - great views as you hike up as well as down.'
 'It was great to see this fall, the view was great, it amazing how the water fall comes all the way from the top of mountain.'
 'Took the shuttle here in the morning, and hiked down the panorama view trail from the

**Topic 11 - Avoiding Crowds**  
In particular, going early to avoid crowds

In [146]:
topic = 11
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Avoiding Crowds')

early      1.214052
lot        1.053028
parking    1.009307
crowd      0.936029
people     0.699065
day        0.492930
morning    0.436918
avoid      0.353899
spot       0.347497
busy       0.305085
Name: topic_11, dtype: float64
['While not as crowded as the Yosemite valley, this area is also popular with anyone visiting the park. It is a beautiful area but arrive early because once the parking lot fills up you will have to be shuttled in to the area. We arrived by 9 AM and had no problems with parking. After returning from our 5 hour hike, the parking lot was full. There are a lot of people who just walk around the point, but if you can hike some, you can get away from the crowds and really see some stunning vistas. '
 'This is a beautiful place!  You will want to take photos & also just sit and look at the view for a while.  It can get crowded, so go early. There is a free shuttle bus that will take you there (from a lower parking lot), if the parking is full at the site.  '
 'Beau

**Topic 12 - Park Views at Glacier Point**  
In particular, 

In [147]:
topic = 12
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Park Views')

park        2.454173
national    1.131551
yosemite    0.566789
visit       0.374167
car         0.240969
day         0.186807
enter       0.148528
entrance    0.113467
area        0.101283
really      0.093202
Name: topic_12, dtype: float64
['This is a very beautiful point in the Yosemite National Park. There is so much to see in there. We really enjoyed everything about Yosemite National Park.'
 'Magnificent views over Yosemite national park. A 35 mile drive from the entrance to the park but what a view. A must when visiting.'
 'So if you can only choose one National Park to visit, make it Yosemite National Park! We went to Glacier Point first, and had an amazing view. Then we drove into the valley and parked.  We took the shuttles around and so enjoyed our visit looking back UP at Glacier Paint! So worth the trip!'
 'Yosemite National Park is probably the best walking and hiking park in the 48 states.  We were their Sept. 10th through the 12th,  There were probably a hundred thousand

**Topic 13 - Glacier Point & Half Dome**  
In particular, 

In [148]:
topic = 13
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Glacier Point & Half Dome')

dome        2.599603
half        2.380788
view        0.373621
sentinel    0.368072
sunset      0.363992
valley      0.200943
nevada      0.188680
vernal      0.150728
mile        0.114453
village     0.107808
Name: topic_13, dtype: float64
['This trail is a bit off the path of the half dome hike and worth doing because it mixes up the scenery of the half dome hike. '
 'Glacier Point give you a small taste of what you see from the top of Half Dome. There are also a few hiking trails that go to Half Dome or to the valley floor.'
 'The best view of the valley and Half dome. No one should miss this view, must visit. I have seen the half dome from this point multiple times. This is spectacular.'
 'After climbing the Half Dome we took a rest day to go to glacier point. It has a different view of the half dome. It is very popular and you can drive to it. '
 'Went to Glacier Point vista point to look over the Yosemite Valley below. You can see Half Dome, El Capitan, and all the snow peaked do

**Topic 14 - Great Place to Visit**  
In particular, 

In [150]:
topic = 14
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Great Place to Visit')

place        2.522541
stop         0.263389
picture      0.249302
amazing      0.236545
visit        0.188744
wonderful    0.183549
just         0.164362
look         0.131450
love         0.130796
feel         0.126871
Name: topic_14, dtype: float64
['Need car to ger there, but is a amazing place. The road is beatiful, the place has a  impressive view. '
 'I was here in May 2014 and this place is on my short list of places that I really wanted to visit.  It was well worth the trip'
 'If you can only check out one place in Yosemite, this is the place. Great views of the valley and surrounding area.'
 'Amazing place, the falls and the surrounding valley were just gorgeous, we all loved the place and wish to go there again.'
 "Really great place to visitIt's wonderful place to see a national park Visited in summer with family elders"]


**Topic 15 - Yosemite Valley**  
In particular, views of YV from Glaicer Point

In [151]:
topic = 15
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Yosemite Valley')

beautiful     2.786377
waterfall     1.030220
scenery       0.239625
just          0.162149
mountain      0.144631
quite         0.143381
close         0.131037
enjoy         0.113788
absolutely    0.102021
rainbow       0.092209
Name: topic_15, dtype: float64
['Perhaps the most beautiful, although a bit stressful drive ever.  The further you go the more beautiful it gets.  '
 "Such a beautiful mountain! It's mass almost can't be described by words. It is a beautiful thing to see!! "
 'These waterfalls are so beautiful they do not look real.  It is a beautiful postcard that is actually real.  One of the most beautiful scenes of the Yosemite National Park.'
 'Beautiful falls, there are many falls in Yosemite National Park.  They all are beautiful against the backdrop of the surrounding scenery.'
 'All the falls were beautiful since we were there in the spring.']


**Topic 3 - Amazing Drives**  
In particular, 

In [152]:
topic = 16
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

drive          1.796311
worth          1.393934
view           1.225510
amazing        0.989893
long           0.343270
bit            0.270477
spectacular    0.268113
definitely     0.262829
stunning       0.198342
effort         0.188688
Name: topic_16, dtype: float64
['The drive up is long, but not very difficult.  It is worth the drive.  The view is absolutely amazing and a must do in Yosemite!'
 'Amazing views of the valley from here. The road to get here is a bit difficult to drive, but well worth it.'
 'Amazing view, but very long drive from anywhere that make it really difficult to visit - but worth it'
 'Worth the trip up there, views are amazing! Drive up there a little scary, but worth the trip. Can see the valley below, great view.'
 'Worth the drive or walk if you are feeling energetic. Amazing views over the mountains and down in to Yosemite valley ']


**Topic 3 -   d**  
In particular, 

In [154]:
topic = 17
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

water     2.791421
flow      0.686396
bring     0.551703
dry       0.419291
spring    0.398956
summer    0.371455
falls     0.310565
lot       0.263514
year      0.263055
plenty    0.260173
Name: topic_17, dtype: float64
['A waterfall with almost no water is always a bit disappointing.  nonetheless, without water one sees the marvelous sculpturing of the boulders beneath and treasures the water flow that is, while missing that which is not.'
 'It was definitely worth the walk to see the base of the falls even tho there was no water. You could see the outline of where the water would have been and just imagine what it would have been like with water flowing although it was sad to see the empty stream bed.'
 'Water was at a trickle but a great hike nonetheless.  Want to revisit in the spring when more water is flowing.  '
 'This water was ragging down the apron into the pool. I had been there in Sept. and walked across the apron. Very dangerous. Slipping hazard and cold water. Imagine be

**Topic 3 - **  
In particular, 

In [155]:
topic = 18
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

step        1.621470
steep       1.240736
climb       1.161384
wet         1.054578
slippery    0.716176
bridge      0.660624
way         0.645348
vernal      0.617462
stair       0.543115
fall        0.453918
Name: topic_18, dtype: float64
['Vernal Falls in on the first water falls on the Mist Trail.  It is considered a moderate hike to the base of the Falls but to get to the top requires hiking steep and uneven rock steps that are wet.  Having said that, climbing the steep steps is part of the fun but just be prepared to get wet!'
 "Bottom of falls couple miles, top of falls you climb the wet steps and get wet.  I've had several companions get a little scared on those steps.  Very beautiful.  Great lunch spot.  Then keep going to Top of Nevada from there. "
 'The trail starts out as an easy uphill climb on a paved walkway. This leads to a bridge with a good view of Vernal Fall. After that, the trail heads on up to the waterfall. As you approach the waterfall, the trail becomes a seri

**Topic 3 -   d**  
In particular, 

In [156]:
topic = 19
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

time        2.705766
visit       1.219578
year        0.993329
just        0.334905
snow        0.326948
spend       0.261969
enjoy       0.241749
spring      0.223032
yosemite    0.200198
love        0.191007
Name: topic_19, dtype: float64
['Try going in the winter time. The view is spectacular any time of year. There are few visitors, so you spend less time navigating and more time sight seeing.'
 'Every time I visit Glacier Point, it is like I am visiting it for the first time. The views are breathtaking. There is so much to take in from every angle. The drive up can be a little nerve wracking so make sure you take your time and enjoy.'
 'Last time we were not able to visit as it was too early in the year, this time it was open. It is amazing to see Yosemite from a high viewpoint. Hairy drive to get there but well worth it.'
 'This was one of the most beautiful places that I have ever visited, and I have visited a few. Well worth the visit, time, and money.'
 "The best time to go wo

**Topic 3 -   d**  
In particular, 

In [122]:
topic = 20
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

vernal        3.823835
nevada        2.738232
muir          0.854241
john          0.790360
fall          0.608544
continue      0.511172
mist          0.395145
past          0.319611
footbridge    0.318717
happy         0.259316
Name: topic_20, dtype: float64
['Vernal Fall is the closer of two with Nevada being further.  If you are looking for adventure and are in good shape, then do the Nevada Falls hike.  We took the John Muir trail to Nevada and it was beautiful!!  Then on our way back, we hit the Vernal Fall trail and immediately were immersed in wall to wall people.  Yikes, the Nevada and John Muir trails were almost empty compared to the cattle herding on Vernal.  But, it is a good hike for the elderly and people with small children.  Go early...as in before 9am.'
 'The proximity to half dome, and the view of Nevada and Vernal Falls below, make this a must see at Yosemite. '
 'To hike to the top of vernal falls is wet and worth every step.  If you are up to it go all the up to N

## Vectorizer & Topic Modeling - Round 3

The cleaned corpus is used for for creating a bag of words with TF-IDF
This will then be used for topic modeling.

In [159]:
topics_list = []

In [169]:
additional_removal_words = ['half', 'dome', 'mist', 'glacier', 'point', 'washburn', 'yosemite', 'falls', 'lower', 'upper', 'sentinel' \
                           'tioga', 'pass', 'columbia', 'el', 'capitan', 'nevada', 'vernal', 'park', 'national', 'valley' , 'great',\
                            'amazing', 'beautiful', 'muir', 'john']

In [202]:
df['review_final_clean'] = df.review_lemma.apply(lambda x: remove_stopwords(x, remove_words_list = additional_removal_words))
df[['review_text','review_final_clean']].head()

Unnamed: 0,review_text,review_final_clean
0,"Not too many ppl when we arrive at 3pm, probably because they are controlling visitor per day in national park. Smoke was not crazy today, view was okay. -Oct 16 2020",not too many ppl when arrive at pm probably because be control visitor per day in smoke be not crazy today view be okay oct
1,"Glacier Point at about 7000 ft elevation can be reached by car, shuttle bus (but not during the pandemic) or several strenuous hikes from Yosemite Valley. Even if you can’t walk well, you still get stellar views. If you can hike several miles there are a lot of options on where to go from here. Most people enjoy the short but steep hike to the geology hut. From there you can see the Yosemite Valley floor below, Yosemite Falls in it’s entirety. From Washburn Point about 1/4 mile before you reach the Glacier Point Parking Lot, you can see the entire length of the mist trail with Vernal and Nevada Falls, and the upper 1/4 of Illilouette Fall. (Here you can get a good view of some of the Panorama Trail, which starts at Glacier Point, and goes past all 3 of these waterfalls, about 8 miles to The Valley below).The star of Glacier Point is Half Dome. It’s so large and seemingly so close it feels like you can touch it! Photo opps abound. The Four Mile Trail starts here, going to Yosemite Valley in 4.4 miles and 3,200 ft elevation loss. The first mile is more of a rim trail, and loses/gains only 250 feet. Walk to the point where the switchbacks start, and turn around to retrace your steps. It’s a mile of stellar views of Half Dome, Yosemite Falls, and the Sierra Nevada Mountains - at a fraction of the effort it takes to hike the entire trail. The road to Glacier Point is not open year round. Check road conditions before going. Wintry conditions or fire can close the road. Generally it’s open end of May through October. If it’s open during your visit, even if pressed for time, this is a must-do! Bring your lunch and sit on the steps of the amphitheater!",at about ft elevation can be reach by car shuttle bus but not during the pandemic or several strenuous hike from even if can not walk well still get stellar view if can hike several mile there be a lot of option on where to go from here most people enjoy the short but steep hike to the geology hut from there can see the floor below fall in entirety from about mile before reach the parking lot can see the entire length of the trail with and fall and the of illilouette fall here can get a good view of some of the panorama trail which start at and go past all of these waterfall about mile to the below the star of be so large and seemingly so close feel like can touch photo opps abound the four mile trail start here go to in mile and ft elevation loss the first mile be more of a rim trail and lose gain only foot walk to the where the switchback start and turn around to retrace step a mile of stellar view of fall and the sierra mountains at a fraction of the effort take to hike the entire trail the road to be not open year round check road condition before go wintry condition or fire can close the road generally open end of may through october if open during visit even if press for time this be a must do bring lunch and sit on the step of the amphitheater
2,"The most Iconic view of the half dome, this is the placeIt's quite a big observation area with huge parking and some walking to doYou'll get the most amazing and iconic view of the Half Dome and the whole valleyTake your time and enjoy it",the most iconic view of the this be the placeit quite a big observation area with huge parking and some walk to doyoull get the most and iconic view of the and the whole valleytake time and enjoy
3,What an absolute breathtaking view! Such a fun place to hang out for a few hours. To see half dome across the way is so cool.,what an absolute breathtaking view such a fun place to hang out for a few hour to see across the way be so cool
4,"This attraction/view will truly take your breath away! And, it's accessible via road before you go through Tunnel View. In fact, you may want to take the right turn at the entrance as you are driving in before proceeding further into Yosemite Valley. Once you make the right turn, you'll drive some distance before arriving at this remarkable vista point. For us, driving up we didn't see much traffic, even with the limited capacity during the coronavirus period. However, once we ""arrived"" at the vista point and parked, we came upon quite a group of tourists (much like Tunnel View). One can only imagine what it must be like during ""normal"" park periods. We were able to find a parking spot right away. Walking from there to the vista point is only a relatively short distance. You'll see Half Dome standing out first, and some helpful signage along the sidewalk area, including restrooms. Have your photography equipment ready and loaded because you will want to take many shots of this area. Whether it's phone camera or regular camera, be prepared to take multiple shots to capture it all. Take your time to take in the beauty and majesty of this natural wonder. Even in mid-summer, you will find several of the falls still flowing if you look close enough. We found both Nevada and Vernal Falls with water flowing. That made up for missing Yosemite Falls water flowing. You may also wish to take the walking/hiking paths that permeate the perimeter of Glacier Point. There's a very famous vantage point that Ansel Adams made famous nearby. This is a ""CAN'T MISS"" part of the Yosemite Park experience. So, get up there.",this attraction view will truly take breath away and accessible via road before go through tunnel view in fact may want to take the right turn at the entrance as be drive in before proceed far into once make the right turn will drive some distance before arrive at this remarkable vista for drive up do not see much traffic even with the limited capacity during the coronavirus period however once arrive at the vista and come upon quite a group of tourist much like tunnel view one can only imagine what must be like during normal period be able to find a parking spot right away walk from there to the vista be only a relatively short distance will see stand out first and some helpful signage along the sidewalk area include restroom have photography equipment ready and loaded because will want to take many shot of this area whether phone camera or regular camera be prepared to take multiple shot to capture all take time to take in the beauty and majesty of this natural wonder even in mid summer will find several of the fall still flow if look close enough find both and fall with water flow that make up for miss fall water flow may also wish to take the walk hiking path that permeate the perimeter of there s a very famous vantage that ansel adam make famous nearby this be a can not miss part of the experience so get up there


In [211]:
df.to_csv("Reviews_cleaned_for_NLP_v2.csv", index=False)

In [203]:
# stop_words = ENGLISH_STOP_WORDS.union(additional_removal_words)

tfidf = TfidfVectorizer(stop_words='english', min_df = 20, max_df = 0.5)
review_word_matrix = tfidf.fit_transform(df['review_lemma'])
review_vocab = tfidf.get_feature_names()


In [204]:
nmf_model, err, topic_matrix, word_matrix = nmf_topic_modeling(review_word_matrix, vocab = review_vocab, n = 20)
topic_matrix[['raw_review','review_cleanned']] = df[['review_text','review_lemma']]

Let's start exploring the topics that were generated using NMF for upto 50 topics

#### Topic 0 - Risky Trails
In particular, hiking at Mist Trail

In [205]:
topic = 0
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Mist Trail')

trail       4.774094
mist        1.985122
nevada      1.096913
vernal      1.095902
muir        1.067449
john        0.949117
falls       0.501973
mile        0.435668
continue    0.331258
fall        0.317597
Name: topic_0, dtype: float64
['I have been through this Trail twice.  First time was in June 2008 on the way to Half Dome and as the fall was in full blast, it was not a "mist" trail, it was more like a "freaking thunderstorming turning over a bucket" trail.  This time around I came down the mist trail on the Panorama Trail and as it was August, I barely got misted.  This is a pretty spectacular trail and a relatively short hike from Happy Isles (2 miles to the top or Vernal Fall or something like that).  It\'s worth a hike up to the top, but use the mist trail, not the John Muir trail.  '
 'Do not sleep in.  We were on the Mist Trail by 7:30 a.m.  We also walked up to Nevada Fall and came back down the John Muir Trail.  There were not crowds until about 11:00 a.m.'
 "The mist t

#### Topic 1 - 'Scenic Drives'
In particular, the views at Glacier Point

In [206]:
topic = 1
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Glacier Point')

point       2.259623
glacier     1.961676
bus         0.344989
road        0.302407
tour        0.287982
mile        0.177383
sentinel    0.155408
way         0.140937
shuttle     0.135141
day         0.132549
Name: topic_1, dtype: float64
["we saw this fall when we went to the Glacier point, it's already very beautiful when we saw in the in Glacier point, you can also see it at the Washburn point (which is a spot on the Glacier point road), there is a trail leading to the falls, you may do it if you have time."
 "We spent the day hiking around Glacier Point.  The Point itself is the one place up the Glacier Point road that you don't have to do a long hike to get to.  It is a relatively short walk to the Point and to the reward of a fabulous view.   Amazingly it was not crowded at around 10 am on a Monday.  There are a number of vantage points to enjoy the view.  Definitely take the time to get a different view of Yosemite and the valley.  There are bathrooms in the parking lot.You can

#### Topic 2 - 'Yosemite Falls'
In particular, Upper and Lower Yosemite Falls

In [207]:
topic = 2
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Yosemite Falls')

fall        2.741441
low         0.823839
upper       0.650517
vernal      0.324433
base        0.280635
yosemite    0.270254
close       0.195180
nevada      0.172449
spring      0.137128
winter      0.122964
Name: topic_2, dtype: float64
["Here is the most important thing to know about all the falls in Yosemite Park.  I have seen a number of people complaining in reviews that they were here in summer, fall, winter, whatever, and the falls were anemic. In California, it does not rain from late spring through mid-fall, and there is no water. The spectacular falls rely on spring snow melt, and so are at their best in early to mid spring.  Even in winter, when it begins to rain, rainfall alone is not enough to really get the falls going, it is when the snowpack melts that they thunder and roar.  So if it is your goal to see the falls at their best, it is essential you plan a spring trip.Yosemite Falls is one spectacular fall.  Well, actually, it is a 3fer! While it is not wide, it is rep

**Topic 3 - Tioga Pass**  
In particular, driving Through Tioga Pass 

In [84]:
topic = 3
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Tioga Pass')

pass        2.852290
tioga       2.225793
lake        1.636049
meadow      0.690729
mono        0.537975
scenery     0.419287
lee         0.373973
east        0.362705
tuolumne    0.341980
tenaya      0.308641
Name: topic_3, dtype: float64
["It's my first time to Tioga Pass. It was closed in my other visits. if you love hiking around of lakes and meadow, you shall try Tioga Pass!"
 "The drive along Tioga Road and over Tioga Pass is is one of California's most spectacular. The scenery along the way is amazing - Tenaya Lake, Tuolumne Meadows, views of Mt. Hoffmann, Mt. Conness, Mt. Dana, and the Cathedral Range, and the view from Olmstead Point of Half Dome and Clouds Rest from a much different vantage point than from Yosemite Valley. And the drive up (or especially down) the east side of Tioga Pass to Lee Vining and Mono Lake is exhilarating...although those with a fear of heights might want to think twice about that portion of the road. There's a pullout on the east side of the pass th

**Topic 4 - Hiking**  

In [88]:
topic = 4
n_reviews = 10
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Hiking')

hike         4.292965
strenuous    0.490808
mile         0.454876
short        0.313068
hour         0.208753
steep        0.193454
moderate     0.180102
recommend    0.172853
tough        0.166330
shape        0.164297
Name: topic_4, dtype: float64
['We hiked to vernal falls and it is a moderate hike.  The trail is well kept and easy to hike on.  It is a beautiful and very peaceful hike.  We went early in the morning and the fog was over the valley which was very serene.'
 'It is a strenuous hike but you will be rewarded. Please motivate yourself to do this hike. It will not take more than an hour even if you stop and go. Kids can do this also and I have seen many elderly people doing the hike. Incredible!'
 "we didn't hike to the top of the falls just the Yosemite valley easy hike to the base and it was a easy and great hike."
 'It was quite strenuous hike but once you are up there you can feel achievement. The view along the hike is so beautiful.'
 'Vernal falls is one of two falls 

**Topic 5 - El Capitan**  
In particular, beauty of El Capitan

In [90]:
topic = 5
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('El Capitan')

el           4.528230
capitan      4.268026
climber      1.338473
binocular    0.415990
spot         0.403950
tunnel       0.400033
cap          0.394506
picture      0.362747
face         0.358512
meadow       0.304601
Name: topic_5, dtype: float64
['Seeing El Capitan was breathtaking. I had always seen pictures of El Capitan but seeing it is person was thrilling. It is so massive that it is really hard to process. We were there two days and I was truly captivated by El Capitan.'
 'Meadows around El Capitan make for a relaxing view of the majestic El Capitan. One of the beauties of Yosemite.'
 'One of our last stops in the valley, we took the El Capitan shuttle out to the El Capitan Picnic area and walked out to the next stop at the El Capitan crossover.  Relatively flat walk, no real trail, we just walked along side the road (moderate traffic) till we got to the Valley Loop Trail, which took us past Cathedral Beach and to the crossover.  Crossover was the best place to view El Capita

**Topic 6 - Walking To Lower Fall**  
In particular, 

In [93]:
topic = 6
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Walking To Lower Fall')

walk      5.419918
short     0.793609
minute    0.225155
path      0.224473
hard      0.213550
base      0.187959
lodge     0.170765
lovely    0.167322
mile      0.157958
uphill    0.143188
Name: topic_6, dtype: float64
['Once again; in summer: GET THERE EARLY! That means that if you want an experience that you would describe as "nice" you will have to be walking at about 07:30. If you get up to Vernall Falls and walk back along the John Muit trail be carefull not to miss the this trail. When you are on top of the Vernall falls you walk further up but when you come across a bridge you have gone to far. We forgot to take a map so walked up further. Eventually we ended up walking all the way to the top of Nevada falls! Ended up walking for 6,5 hours, almost 14km, 600m up and 600m downhill.'
 'It is a great short and easy walk to see the lower fall.  Lots of people, but the walk is beautiful and serence.'
 'We did do this walk, the start is easy to do. Because of that we did walk in a car

**Topic 7 - Amazing View**  
In particular, at the top of the falls & getting there

In [98]:
topic = 7
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Amazing Views')

worth         4.509240
effort        0.855170
definitely    0.733977
bit           0.669340
trip          0.567620
totally       0.362342
little        0.223373
absolutely    0.186453
tough         0.134280
trek          0.124622
Name: topic_7, dtype: float64
["Worth the hike up to see the falls - it is a bit of a climb but if you take it slow it is worth the trip. Very popular area so can be crowded but that shouldn't stop you"
 'Worth the trip up there, views are amazing! Drive up there a little scary, but worth the trip. Can see the valley below, great view.'
 "The Falls is beautiful and definitely worth the easy hike.  It does get crowded as it's not a huge area to view it but definitely worth it.  It's awesome!"
 'Best view of the entire valley and worth the drive to the top. We had two teens and one preteen all boys and they thought it was worth it.'
 "Don't give up even when the trail seems never-ending. The view at the top of the falls is SOOOOOOOOOO worth it. "]


**Topic 8 - Seasons for Falls**  
In particular, 

In [100]:
topic = 8
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Falls Viewing Season')

fall      6.459617
base      0.464369
dry       0.316726
spring    0.291243
spray     0.233936
summer    0.218177
bridge    0.177436
near      0.172987
run       0.164288
right     0.154591
Name: topic_8, dtype: float64
["Here is the most important thing to know about all the falls in Yosemite Park.  I have seen a number of people complaining in reviews that they were here in summer, fall, winter, whatever, and the falls were anemic. In California, it does not rain from late spring through mid-fall, and there is no water. The spectacular falls rely on spring snow melt, and so are at their best in early to mid spring.  Even in winter, when it begins to rain, rainfall alone is not enough to really get the falls going, it is when the snowpack melts that they thunder and roar.  So if it is your goal to see the falls at their best, it is essential you plan a spring trip.Yosemite Falls is one spectacular fall.  Well, actually, it is a 3fer! While it is not wide, it is reportedly the tallest 

**Topic 9 - Rock Climbing**  
In particular, 

In [102]:
topic = 9
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Rock Climbing')

rock          3.855680
climber       1.668572
face          1.046238
watch         0.829892
formation     0.472010
impressive    0.408355
binocular     0.359285
scale         0.353838
granite       0.350937
huge          0.304945
Name: topic_9, dtype: float64
['a MUST for a rock climber... but even if you are NOT - go check out the climbers , this is NUTS! one of the iconic rocks in the valley - how could you miss it if you are here anyway? '
 'We only experienced the great might of this 900m high rock face from the bottom, and it was impressive enough, let alone from its top. Ideal for rock climbers and base jumpers, but it is extremely tough to get to the top.'
 'As part of my day tour from SFO,  the tour operator stopped here. The rock formation is gigantic. I believe a lot of rock climbers come and camp here. I walked close to the rock and came back. Saw a herd of deer grazing there. I am not sure how one could climb this. the tour operator told us that the first climb took some 45

**Topic 10 - Great Views for Falls**  
In particular, 

In [105]:
topic = 10
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Great Views for Falls')

great          4.295647
photo          0.478787
opportunity    0.295339
fun            0.180673
hiking         0.131721
experience     0.131627
crowded        0.099593
view           0.099456
spot           0.094888
picture        0.085015
Name: topic_10, dtype: float64
['so many great views from one of the most beautiful place in the park, you can take a great tour bus up to the point for a great day of exploring or a great day hike. small store  and great views.'
 'It is a great hike to get to the top, steep in places. Great view at the top! Yosemite is just beautiful and it felt great to be at the top!'
 'Great for hiking - great views of the falls - there are 2 trails - lower and upper - upper one is much longer and stepper - great views as you hike up as well as down.'
 'It was great to see this fall, the view was great, it amazing how the water fall comes all the way from the top of mountain.'
 'Last time here we did the Panama trail which was great but this time we just looked a

**Topic 11 - Avoiding Crowds**  
In particular, going early to avoid crowds

In [108]:
topic = 11
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Avoiding Crowds')

early        2.584439
crowd        2.397241
morning      0.961203
avoid        0.888423
start        0.508107
summer       0.452922
busy         0.396791
late         0.389514
afternoon    0.312674
arrive       0.229653
Name: topic_11, dtype: float64
['Crowds below; crowds on the Mist Trail to the top, and busy at the top as well.  But worth it!  Go early!'
 'Visit in early morning or late afternoon to avoid the crowds'
 'We stop 1st here by car, was not to crowd early in the morning and you have a wonderful view of the full valley'
 'Drove up real early on my birthday. What a glorious way to begin my day. Going early is a must before the summer crowds arrive.'
 'Well maintained path with steep inclines.  To avoid the crowds start the hike early.  If you you are not there early it is recommended to take the shuttle to avoid parking problems.']


**Topic 12 - Park Views at Glacier Point**  
In particular, 

In [110]:
topic = 12
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Park Views')

park        3.121951
national    1.443777
yosemite    0.596151
car         0.309724
enter       0.183118
entrance    0.125020
leave       0.094775
service     0.093389
visitor     0.086358
stay        0.082068
Name: topic_12, dtype: float64
['This is a very beautiful point in the Yosemite National Park. There is so much to see in there. We really enjoyed everything about Yosemite National Park.'
 'Yosemite National Park is probably the best walking and hiking park in the 48 states.  We were their Sept. 10th through the 12th,  There were probably a hundred thousand or more in the park, but the park is so massive that it did not seem crowed. Their are extreme trails and moderate trails. This is a must see park. Not a park that you ride around in like Yellowstone but, worth the trip.'
 'You absolutely must go to Yosemite National Park, and Glacier Pint is a must-see! The rock formation is just another example of the stunning beauty within the national park!!'
 'Magnificent views over Yose

**Topic 13 - Glacier Point & Half Dome**  
In particular, 

In [113]:
topic = 13
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Glacier Point & Half Dome')

dome          3.519145
half          3.203864
sentinel      0.506813
sunset        0.498547
village       0.143268
mile          0.126365
include       0.124681
sun           0.120580
especially    0.119849
arrive        0.104188
Name: topic_13, dtype: float64
['This trail is a bit off the path of the half dome hike and worth doing because it mixes up the scenery of the half dome hike. '
 'Glacier Point give you a small taste of what you see from the top of Half Dome. There are also a few hiking trails that go to Half Dome or to the valley floor.'
 'After climbing the Half Dome we took a rest day to go to glacier point. It has a different view of the half dome. It is very popular and you can drive to it. '
 'The best view of the valley and Half dome. No one should miss this view, must visit. I have seen the half dome from this point multiple times. This is spectacular.'
 'Went to Glacier Point vista point to look over the Yosemite Valley below. You can see Half Dome, El Capitan, and al

**Topic 14 - Beautiful Falls**  
In particular, 

In [115]:
topic = 14
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Beautiful Falls')

beautiful     4.841553
scenery       0.385571
quite         0.231738
yosemite      0.141283
say           0.134240
sunset        0.132398
absolutely    0.130714
crowded       0.098738
stair         0.089163
nature        0.080211
Name: topic_14, dtype: float64
['Perhaps the most beautiful, although a bit stressful drive ever.  The further you go the more beautiful it gets.  '
 "Such a beautiful mountain! It's mass almost can't be described by words. It is a beautiful thing to see!! "
 'Beautiful falls, there are many falls in Yosemite National Park.  They all are beautiful against the backdrop of the surrounding scenery.'
 'All the falls were beautiful since we were there in the spring.'
 'These waterfalls are so beautiful they do not look real.  It is a beautiful postcard that is actually real.  One of the most beautiful scenes of the Yosemite National Park.']


**Topic 15 - Yosemite Valley**  
In particular, views of YV from Glaicer Point

In [117]:
topic = 15
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
topics_list.append('Yosemite Valley')

valley         4.560932
yosemite       2.360615
floor          1.118505
different      0.307364
entire         0.278964
mile           0.223804
hour           0.196074
lodge          0.174310
perspective    0.162521
sight          0.147274
Name: topic_15, dtype: float64
['A must side trip when visiting Yosemite Valley.  The drive takes time, but it is one of the best views of the valley. '
 'We busted here to get a view of the entire Yosemite valley. Views are utterly amazing, especially the drop down to the valley. '
 'The views here are utterly amazing. We like to stop here on the way into the valley, since it takes about an hour to get to the valley from here. It is about 4000 feet above the valley floor, with commanding views of Half Dome, Yosemite Falls, the valley floor, Vernal Falls, and Nevada Falls. You may also start several hikes here, including the aptly named 4-milte trail and 8-mile trail to the valley floor.It is so inspiring, I nearly proposed to my wife here. Didn\'t h

**Topic 3 -   d**  
In particular, 

In [118]:
topic = 16
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

amazing       3.970289
experience    0.356236
nature        0.163672
view          0.123779
scenery       0.120757
site          0.115519
awe           0.106738
simply        0.103922
glad          0.095835
stand         0.090288
Name: topic_16, dtype: float64
['Definately must do!Amazing, marvelous view, just to escape from the city jungle and enjoy with all your sences!!'
 'defi have a stop at glacier point - the views are just amazing from there. It does get crowded but you will find space for that amazing picture and views. '
 'This is a medium difficulty trail to the top of the waterfall. The view is amazing from the top and the source of waterfall is just an amazing site.'
 'It is amazing to think about how people climb this massive rock face! There are amazing views of this from many places around the park!'
 'The views and scenery are amazing, do it, you will love it!']


**Topic 3 -   d**  
In particular, 

In [119]:
topic = 17
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

water     3.600122
flow      0.781722
bring     0.698549
plenty    0.342464
spring    0.275069
dry       0.265402
summer    0.257024
snack     0.242907
bottle    0.226840
hot       0.193693
Name: topic_17, dtype: float64
['A waterfall with almost no water is always a bit disappointing.  nonetheless, without water one sees the marvelous sculpturing of the boulders beneath and treasures the water flow that is, while missing that which is not.'
 'It was definitely worth the walk to see the base of the falls even tho there was no water. You could see the outline of where the water would have been and just imagine what it would have been like with water flowing although it was sad to see the empty stream bed.'
 'This water was ragging down the apron into the pool. I had been there in Sept. and walked across the apron. Very dangerous. Slipping hazard and cold water. Imagine being so close to the top of a water fall. Hope they never move the fence. Stay out of the water. '
 'Water was at a tr

**Topic 3 -   d**  
In particular, 

In [120]:
topic = 18
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

place        4.750276
wonderful    0.349009
picture      0.284412
sunset       0.196402
love         0.172189
stunning     0.153094
truly        0.143140
life         0.141824
word         0.129325
list         0.128667
Name: topic_18, dtype: float64
['Need car to ger there, but is a amazing place. The road is beatiful, the place has a  impressive view. '
 'If you can only check out one place in Yosemite, this is the place. Great views of the valley and surrounding area.'
 'I was here in May 2014 and this place is on my short list of places that I really wanted to visit.  It was well worth the trip'
 "Really great place to visitIt's wonderful place to see a national park Visited in summer with family elders"
 'This place symbolizes the grandness of nature. Breathtaking views which are unique to Yosemite National Park. Visit the place early as the place has a very limited parking. You might have to wait for hours to get the shuttle to this point.']


**Topic 3 -   d**  
In particular, 

In [121]:
topic = 19
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

time          5.213040
spend         0.494372
wish          0.205573
need          0.170479
able          0.159155
stay          0.159064
experience    0.158379
want          0.147067
life          0.144978
plenty        0.130109
Name: topic_19, dtype: float64
['Try going in the winter time. The view is spectacular any time of year. There are few visitors, so you spend less time navigating and more time sight seeing.'
 "The best time to go would be around May because after that the Falls will run weaker. You can see it from the Valley and it's like your greetings card from Yosemite! Spend some time and go for a hike!"
 'Every time I visit Glacier Point, it is like I am visiting it for the first time. The views are breathtaking. There is so much to take in from every angle. The drive up can be a little nerve wracking so make sure you take your time and enjoy.'
 "I've been to this part of the Yosemite NP several times but each time I still find it interesting. It's always busy with heap 

**Topic 3 -   d**  
In particular, 

In [122]:
topic = 20
n_reviews = 5
n_words = 10

print(top_words(word_matrix, topic, n_words))
print(top_reviews(topic_matrix, topic, n_reviews))
# topics_list.append('Yosemite Falls')

vernal        3.823835
nevada        2.738232
muir          0.854241
john          0.790360
fall          0.608544
continue      0.511172
mist          0.395145
past          0.319611
footbridge    0.318717
happy         0.259316
Name: topic_20, dtype: float64
['Vernal Fall is the closer of two with Nevada being further.  If you are looking for adventure and are in good shape, then do the Nevada Falls hike.  We took the John Muir trail to Nevada and it was beautiful!!  Then on our way back, we hit the Vernal Fall trail and immediately were immersed in wall to wall people.  Yikes, the Nevada and John Muir trails were almost empty compared to the cattle herding on Vernal.  But, it is a good hike for the elderly and people with small children.  Go early...as in before 9am.'
 'The proximity to half dome, and the view of Nevada and Vernal Falls below, make this a must see at Yosemite. '
 'To hike to the top of vernal falls is wet and worth every step.  If you are up to it go all the up to N