In [1]:
import pandas as pd

reviews = pd.read_csv("reviews.csv", encoding="utf-8",
                  names=["place", "review_date", "reviewer_country", "title","content","visit_date","review_rating"])
reviews.head()


Unnamed: 0,place,review_date,reviewer_country,title,content,visit_date,review_rating
0,Chinatown Heritage Centre (Singapore),2/6/2021,"Singapore, Singapore",Great stopover,This place has great information about the are...,1/6/2021,5
1,Chinatown Heritage Centre (Singapore),3/11/2020,"Bensalem, PA",Great History Lesson,Well worth the stop. Great history lesson of t...,1/1/2020,5
2,Chinatown Heritage Centre (Singapore),4/10/2020,"Ellon, UK",good,very well organized well worth a visit. Inform...,1/2/2020,5
3,Chinatown Heritage Centre (Singapore),1/8/2020,"Edinburgh, UK",Brilliant,one of the best destination visitor centres in...,1/7/2020,5
4,Chinatown Heritage Centre (Singapore),6/6/2020,"Cairns, Australia",Cultural Time Capsule,This place is wonderful.\n\nI bought my ticket...,1/11/2019,5


In [3]:
# check data type

# string changed to datetime 
reviews['review_date'] = pd.to_datetime(reviews['review_date'])

reviews['visit_date'] = pd.to_datetime(reviews['visit_date'])

print(type(reviews.iloc[0]["review_date"]))
print(type(reviews.iloc[0]["visit_date"]))

<class 'pandas._libs.tslibs.timestamps.Timestamp'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [4]:
# title and content combined for easier analysis
reviews['review_date'] = pd.to_datetime(reviews['review_date'])

reviews["review_content"] = reviews["title"] + " " +reviews["content"]

reviews.drop(columns=["title","content"],inplace=True)


In [5]:
reviews.head()

Unnamed: 0,place,review_date,reviewer_country,visit_date,review_rating,review_content
0,Chinatown Heritage Centre (Singapore),2021-02-06,"Singapore, Singapore",2021-01-06,5,Great stopover This place has great informatio...
1,Chinatown Heritage Centre (Singapore),2020-03-11,"Bensalem, PA",2020-01-01,5,Great History Lesson Well worth the stop. Grea...
2,Chinatown Heritage Centre (Singapore),2020-04-10,"Ellon, UK",2020-01-02,5,good very well organized well worth a visit. I...
3,Chinatown Heritage Centre (Singapore),2020-01-08,"Edinburgh, UK",2020-01-07,5,Brilliant one of the best destination visitor ...
4,Chinatown Heritage Centre (Singapore),2020-06-06,"Cairns, Australia",2019-01-11,5,Cultural Time Capsule This place is wonderful....


In [6]:
# check for null values

reviews.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66447 entries, 0 to 66446
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   place             66447 non-null  object        
 1   review_date       66447 non-null  datetime64[ns]
 2   reviewer_country  58564 non-null  object        
 3   visit_date        64697 non-null  datetime64[ns]
 4   review_rating     66447 non-null  int64         
 5   review_content    66447 non-null  object        
dtypes: datetime64[ns](2), int64(1), object(3)
memory usage: 3.0+ MB


In [7]:
# look at rows with null values

reviews_all_null = reviews[reviews.isna().any(axis=1)]
reviews_visit_null = reviews[reviews["visit_date"].isna()]
reviews_country_null = reviews[reviews["reviewer_country"].isna()]

print(len(reviews_country_null.index))
print(len(reviews_visit_null.index))
print(len(reviews_all_null.index))

7883
1750
9429


In [8]:
reviews = reviews.dropna()
reviews.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 57018 entries, 0 to 66446
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   place             57018 non-null  object        
 1   review_date       57018 non-null  datetime64[ns]
 2   reviewer_country  57018 non-null  object        
 3   visit_date        57018 non-null  datetime64[ns]
 4   review_rating     57018 non-null  int64         
 5   review_content    57018 non-null  object        
dtypes: datetime64[ns](2), int64(1), object(3)
memory usage: 3.0+ MB


In [9]:
# clean review_content
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

# library requirements
nltk.download('punkt')
nltk.download('stopwords')

# set stop words
stop_words = set(stopwords.words('english'))

# remove white spaces or paragraphs new lines
reviews['review_content'] = reviews['review_content'].apply(lambda x: re.sub("\s+"," ", x))

reviews.head()

Unnamed: 0,place,review_date,reviewer_country,visit_date,review_rating,review_content
0,Chinatown Heritage Centre (Singapore),2021-02-06,"Singapore, Singapore",2021-01-06,5,Great stopover This place has great informatio...
1,Chinatown Heritage Centre (Singapore),2020-03-11,"Bensalem, PA",2020-01-01,5,Great History Lesson Well worth the stop. Grea...
2,Chinatown Heritage Centre (Singapore),2020-04-10,"Ellon, UK",2020-01-02,5,good very well organized well worth a visit. I...
3,Chinatown Heritage Centre (Singapore),2020-01-08,"Edinburgh, UK",2020-01-07,5,Brilliant one of the best destination visitor ...
4,Chinatown Heritage Centre (Singapore),2020-06-06,"Cairns, Australia",2019-01-11,5,Cultural Time Capsule This place is wonderful....


In [11]:

for index, rows in reviews.iterrows():
    review_content = rows['review_content']
    # tokenize
    tokenized_review = word_tokenize(review_content)
    # standardize to lower case
    lower_review = [w.lower() for w in tokenized_review]
    # keep token words which are text
    text_words_only = [w for w in lower_review if re.search('^[a-z]+$',w)]
    # remove token words which are stop words
    text_stopremoved = [w for w in text_words_only if w not in stop_words]
    # join back into sentence
    text_joined = " ".join(text_words_only)
    
    # add edited sentence back to dataframe
    reviews.at[index,'review_content'] = text_joined

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Shawn\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Shawn\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [14]:
for review in reviews["review_content"]:
    print(review)
    print()

great stopover this place has great information about the area although i not really ventured a lot as a tourist i found this place very informative and i got to learn something new

great history lesson well worth the stop great history lesson of the life of chinese laborers who help establish singapore the living conditions were even worse than i had imagined plan on spending at least an hour

good very well organized well worth a visit information you would not know about very interesting especially as our visit coincided with chinese new year

brilliant one of the best destination visitor centres in singapore brings the history of the area to life and is a fascinating visit

cultural time capsule this place is wonderful i bought my ticket thinking i would spend an hour and it long enough so i came back later the same day after checking out of my hotel and spend another ticket is valid for multiple entries on same day only the centre has an amazing floors galleries that take the vis

culture place this place are more teaching us do the right things and do do the harmful social i strongly recommend everybody must visit the hades

are you brave enough to go though the ten courts of hell haw paw village or tiger balm park i really enjoyed this place it is free to get in however you will not get the story behind it all we booked a tour with tour east and had a guided tour with a lady dragon it was well worth the money and it was a great insight to the story of the brothers who invented tiger balm if you just go and have a look it is good however having the guided tour made it excellent this place must be saved for the future otherwise it will end up as a block of flats please visit it you can catch a train there and it is a two minute walk from the station

awesome i give it stars if i could loved our visit even the teenage son enjoyed it and hubby had a chance to reminisce yeah it kind of kitschy but it full of educational and cultural information and right next to th

sakura matsuri love the cherry blossoms at gardens by the bay the pretty flowers came in so many different shades and they were in full bloom when we were there definitely loved the display

conservatories and the tree observatory it was an amazing experience i love the flowers in the flower dome i feel like i was in japan the cloud forest was fantastic creation the super tree observatory it was a breathtaking view of the marina bay plus the elevator was so cool the experience was

i was amazed at how well decorated everything was when i went there i was proud to be singaporean

must do in this little red dot should miss this if you love flowers nature easy to access using mrt alight at bayfront station in downtown line blue do forget the floral fantasy it show

one of the best place you can visit in singapore free for those who just want to enjoy the nature pay entrance to flower or cloud dome if you want

flower dome went to view the sakura cherry blossom at flower dome paid only per

in singapore the food may not be gourmet but it is good as evidenced by the throngs of singaporeans eating lunch there every day a treat with dozens of options from all the walks of singaporean life

food court really loved the indonesian stall i love food the spicy red chicken was so tasty with coconut spice beautiful the market building is clean historic big ben is heard on the hour

great selection of food a large in door hawker centre with a good variety of cuisines some persistent vendors chase you round with their menus ignore them and make your own choice great chicken buns and good value compared with newton

great do yourself a favour and visit lau pa sat also known as telok ayer is the best of the hawker big food in our opinion for food flavour choices and the venue when compared to maxwells tanjung pagar plaza mall court koufu it is located on anson rd and not far from telok ayer station whilst open all the time the evening meal is the time to go outside are the satay stalls

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



great place the place has a lot of shops you can spend your whole day there there is everything mostly on the high end stuffs next to marina sands bay

luxury at its finest the shoppes offer an exciting place for the luxury seeker and the vulture with pieces like montblanc chanel hublot versace and others the shoppes is truly a global marketplace for

shop til you drop a plethora of shops and high end stores bring your credit card and be prepared to charge great shopping experience will definitely appreciate the air conditioning

amazing what a shopping centre every shop you can imagine with a food hall that has something for everyone what about the views back across the marina just fantastic connected to the bayfront mtr directly connected a must experience

nothing like it i never been to such a place before the number of shops and restaurants is incredible the centre itself is amazing there a canal running through it with boat rides you can take it really is something special

high 

and running venue so big and every shop you could want is represented enjoyed my morning run around the bay before the retail day had started a must visit on any trip to singapore the light show at night is also something to experience

paradise for branded shopper there are so many branded shop you ever heard or never heard originality is proven and the environment is really comfortable

huge huge shopping mall not necessarily worth a visit huge huge shopping mall with a lot of people window shopping but not that many actually in the shops all the luxury brands are here under one roof but it seemed like people were more interested in the regular restaurants and food court than in the luxury brands walk towards the waterfront and there are great views of the central business district the connected casino requires your original passport for identification

different this is a must see place to visit very different great shopping and fantastic views although restaurants are very expensiv

iconic landmark go early i went there early in the morning to take pictures there was a crowd of tourists but i managed to take some good shots also it cooler in the morning there are spots you can stand where you have your hands cupped in front or your mouth open so it appears as if the water from merlion is shooting into your hands or mouth very cool

i took the river boat to here it was an amazing educational exp one i would do again the tour guide was very detailed explaining the impact of the historical events along the river it was great to learn about the different times in singapore history it was also great to get the story behind the merlion and by arriving by river boat it gave you a different view

one of the most photographed places in singapore not without reason from here you can take almost any photographs of singapore as you can marina bay sands hotel the lion statue the city center of skyscrapers the football pitch the helix bridge and esplanade theaters of the bay do


we saw the crocodile for years we just heard about it but this time we really saw it and to take pictures we went over the gate it was right there in the water at meters distance nothing between us and him nice ok there are still the monitor lizards the birds the mudskippers the crabs etc etc but the crocodile is the highlight


wild life in singapore fun early morning nature walk may run into some snakes listen to the sounds other than taxi drivers crickets and birds stay on the path and enjoy

ooooooooooooooo monitor lizards such a fun place to take the children hope you not scared of the monitor lizards though when we visited they were all out walkies and some exceeded six feet in length just magnificent lots of different walks to do and if you lucky you may even glimpse a cayman we saw lots of birds and mud skippers also recommend going for lunch at bollywood veggies about a ten minute drive away

nature up close liked the walk and the various trails brought some kids along and so

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [19]:
# library requirements
pip install vaderSentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


def vader_compound_score(sentence): 
    sid_obj = SentimentIntensityAnalyzer() 
    # returns a dict of polarity score information we only require the "compound"
    sentiment_dict = sid_obj.polarity_scores(sentence)
    return sentiment_dict['compound']

for index,rows in reviews.iterrows():
    vader_score = vader_compound_score(rows['review_content'])
    reviews.at[index,'vader_sent'] = vader_score



                                           place review_date  \
0          Chinatown Heritage Centre (Singapore)  2021-02-06   
1          Chinatown Heritage Centre (Singapore)  2020-03-11   
2          Chinatown Heritage Centre (Singapore)  2020-04-10   
3          Chinatown Heritage Centre (Singapore)  2020-01-08   
4          Chinatown Heritage Centre (Singapore)  2020-06-06   
...                                          ...         ...   
66442  Adventure Cove Waterpark (Sentosa Island)  2016-10-24   
66443  Adventure Cove Waterpark (Sentosa Island)  2016-10-23   
66444  Adventure Cove Waterpark (Sentosa Island)  2016-10-23   
66445  Adventure Cove Waterpark (Sentosa Island)  2016-10-19   
66446  Adventure Cove Waterpark (Sentosa Island)  2016-10-18   

                reviewer_country visit_date  review_rating  \
0           Singapore, Singapore 2021-01-06              5   
1                   Bensalem, PA 2020-01-01              5   
2                      Ellon, UK 2020-01-02  

In [21]:
reviews.head()

reviews.to_csv("review_clean_w_vader.csv",index=False,header=True)