### **Reviews Dataset**

# Basic Cleaning

## Loading Dataset

In [1]:
import numpy as np
import pandas as pd
import gdown

In [2]:
# Load Data
# Read the CSV for reviews https://drive.google.com/file/d/198ysxLSRf2LyjOkJAYEoQwpxIrjn85rA/view?usp=sharing
url = f"https://drive.google.com/uc?export=download&id=198ysxLSRf2LyjOkJAYEoQwpxIrjn85rA"
output = "reviews_cleaned.csv"
gdown.download(url, output, quiet=False)
reviews_data = pd.read_csv(output)

Downloading...
From (original): https://drive.google.com/uc?export=download&id=198ysxLSRf2LyjOkJAYEoQwpxIrjn85rA
From (redirected): https://drive.google.com/uc?export=download&id=198ysxLSRf2LyjOkJAYEoQwpxIrjn85rA&confirm=t&uuid=df54b1fb-bb27-4610-b7d7-f3e8cddbed2b
To: /content/reviews_cleaned.csv
100%|██████████| 313M/313M [00:05<00:00, 60.8MB/s]


## Basic Statistics

In [8]:
# Data First Few Lines
reviews_data.head(20)

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments,word_count,has_html,lang
0,2595,17857,2009-11-21,50679,Jean,Our three-night stay. We enjoyed the apartment...,124,False,en
1,2595,19176,2009-12-05,53267,Cate,Great experience.,2,False,ro
2,2595,19760,2009-12-10,38960,Anita,I've stayed with my friend at the Midtown Cast...,90,False,en
3,2595,34320,2010-04-09,71130,Kai-Uwe,"We've been staying here for about 9 nights, en...",66,False,en
4,2595,46312,2010-05-25,117113,Alicia,We had a wonderful stay at Jennifer's charming...,24,False,en
5,2595,1238204,2012-05-07,1783688,Sergey,Hi to everyone!\rWould say our greatest compli...,99,False,en
6,2595,1293632,2012-05-17,1870771,Loïc,"Jennifer was very friendly and helpful, and he...",37,False,en
7,2595,2022498,2012-08-18,2124102,Melanie,This apartment is like a real castle old and u...,208,False,en
8,2595,4682989,2013-05-20,496053,Eric,Jennifer's place was in a great midtown locati...,57,False,en
9,2595,13193832,2014-05-21,13685934,Gerald,Jennifer is a very nice host. Everything is cl...,25,False,en


In [9]:
#Change Date Type
reviews_data['date'] = pd.to_datetime(reviews_data['date'])

reviews_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 966818 entries, 0 to 966817
Data columns (total 9 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   listing_id     966818 non-null  int64         
 1   id             966818 non-null  int64         
 2   date           966818 non-null  datetime64[ns]
 3   reviewer_id    966818 non-null  int64         
 4   reviewer_name  966818 non-null  object        
 5   comments       966818 non-null  object        
 6   word_count     966818 non-null  int64         
 7   has_html       966818 non-null  bool          
 8   lang           966818 non-null  object        
dtypes: bool(1), datetime64[ns](1), int64(4), object(3)
memory usage: 59.9+ MB


In [10]:
# Data Description
reviews_data.describe()

Unnamed: 0,listing_id,id,date,reviewer_id,word_count
count,966818.0,966818.0,966818,966818.0,966818.0
mean,1.634385e+17,4.887247e+17,2021-02-19 06:57:53.049115392,160928600.0,45.102334
min,2595.0,3149.0,2009-05-25 00:00:00,1.0,1.0
25%,9824042.0,366387600.0,2019-01-02 00:00:00,31353850.0,15.0
50%,27598710.0,5.209417e+17,2021-12-19 00:00:00,105575100.0,32.0
75%,51684530.0,9.101462e+17,2023-06-09 00:00:00,250088800.0,60.0
max,1.308179e+18,1.325553e+18,2025-01-02 00:00:00,669621300.0,1001.0
std,3.349283e+17,4.734526e+17,,157116500.0,45.963965


In [11]:
#total values per column
reviews_data.count()

Unnamed: 0,0
listing_id,966818
id,966818
date,966818
reviewer_id,966818
reviewer_name,966818
comments,966818
word_count,966818
has_html,966818
lang,966818


## Data Cleaning

In [12]:
#Filling missing values (comments = 235, review_name = 3)
reviews_data['comments'].fillna("blank", inplace=True)
reviews_data['reviewer_name'].fillna("none", inplace=True)
reviews_data.isnull().sum()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  reviews_data['comments'].fillna("blank", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  reviews_data['reviewer_name'].fillna("none", inplace=True)


Unnamed: 0,0
listing_id,0
id,0
date,0
reviewer_id,0
reviewer_name,0
comments,0
word_count,0
has_html,0
lang,0


#Word Count Analysis

In [13]:
reviews_data['word_count'].value_counts().head(10)

Unnamed: 0_level_0,count
word_count,Unnamed: 1_level_1
2,22460
4,19341
6,18897
5,18079
7,17552
8,17403
10,17309
9,17235
11,16967
12,16572


In [14]:
reviews_data['word_count'].value_counts().tail(10)

Unnamed: 0_level_0,count
word_count,Unnamed: 1_level_1
549,1
646,1
541,1
537,1
847,1
633,1
494,1
865,1
532,1
697,1


In [15]:
reviews_data.sort_values(by='word_count', ascending=False).head(10)

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments,word_count,has_html,lang
469394,25812079,719836232,2020-12-30,288158106,Josh,"DO NOT RENT FROM THIS GUY!!! Trust me, RUN whi...",1001,False,en
85202,1370405,510791732,2019-08-16,258846546,Olivia,"First, I would like to be clear that this is a...",1000,False,en
389425,19915939,493434425,2019-07-22,250985004,Timica,Let me say I have stayed at many Airbnb. My hu...,1000,False,en
41186,566712,495831512,2019-07-26,2657102,Hilda,This was the first time we checked out 2 days ...,1000,False,en
513677,29929702,404229221837425320,2021-07-11,110716358,Kham,When writing a review I believe it’s key to st...,997,False,en
470314,26602263,587163826,2020-01-02,78356234,Anastasia,"Looking forward to the New Year holiday, my fr...",996,False,en
258933,11100541,685450145713359317,2022-08-03,960331,Cheryl,"PROS: 1. It was very clean, with a maid servic...",995,False,en
542792,32701542,505011701745082239,2021-11-27,10785614,Mitchell,Please let me preface this by saying that I ha...,995,False,en
74869,1187643,582590675161448375,2022-03-14,13839913,Niness,"well, I chose the Daniel loft based on the rev...",993,False,en
770373,54304065,735551462151233925,2022-10-11,445153264,Lina,The apartment itself is indeed charming. I was...,992,False,en


##Split Pre and Post

In [None]:
local_law_date = '2023-09-05'
df_pre = reviews_data[reviews_data['date'] < local_law_date]
df_post = reviews_data[reviews_data['date'] >= local_law_date]
print(df_pre.count())
print(df_post.count())

listing_id       781039
id               781039
date             781039
reviewer_id      781039
reviewer_name    781039
comments         781039
word_count       781039
has_html         781039
lang             781039
dtype: int64
listing_id       185779
id               185779
date             185779
reviewer_id      185779
reviewer_name    185779
comments         185779
word_count       185779
has_html         185779
lang             185779
dtype: int64


# Textblob

In [None]:
!pip install textblob



In [None]:
from textblob import TextBlob

In [None]:
!pip install torch tokenizers torchvision

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
# Import Packages
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
from nltk.stem import PorterStemmer
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import sklearn
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel

### Textblob

In [None]:
#make copy of reviews data
reviews_data_textblob = reviews_data.copy()

In [None]:
from textblob import TextBlob

text = reviews_data_textblob['comments'][0]
blob = TextBlob(text)

# Get sentiment scores
polarity = blob.sentiment.polarity
subjectivity = blob.sentiment.subjectivity

print(f"Polarity: {polarity}, Subjectivity: {subjectivity}")


Polarity: 0.16749999999999998, Subjectivity: 0.5775


In [None]:
import pandas as pd
from textblob import TextBlob

# Apply sentiment analysis
reviews_data_textblob['polarity'] = reviews_data_textblob['comments'].apply(lambda x: TextBlob(x).sentiment.polarity)
reviews_data_textblob['subjectivity'] = reviews_data_textblob['comments'].apply(lambda x: TextBlob(x).sentiment.subjectivity)



In [None]:
# Label sentiment based on polarity
def get_sentiment_label(polarity):
    if polarity > 0.05:
        return "Positive"
    elif polarity < -0.05:
        return "Negative"
    else:
        return "Neutral"

reviews_data_textblob['sentiment'] = reviews_data_textblob['polarity'].apply(get_sentiment_label)

reviews_data_textblob

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments,word_count,has_html,lang,polarity,subjectivity,sentiment
0,2595,17857,2009-11-21,50679,Jean,"Our three-night stay. We enjoyed the apartment which is very well located. Pleasant, clean and well maintained. It is ideal for a family of 3 or 4 people. Small problems when we arrived there was no one to receive us, and the soap for the shower, the dishwashing liquid were missing, we received them the day after tomorrow. There is also the noise of the NY Metro, so a difficult first night if you are not used to it. Jennifer is correct the refund of the deposit was very fast. Apart from these small details our short stay went well. If I have the opportunity to return to NY for the holidays, I will stay at ""The Midtown Castle"" Jean Possession - Reunion Island",124,False,en,0.167500,0.577500,Positive
1,2595,19176,2009-12-05,53267,Cate,Great experience.,2,False,ro,0.800000,0.750000,Positive
2,2595,19760,2009-12-10,38960,Anita,"I've stayed with my friend at the Midtown Castle for six days and it was a lovely place to be. A big spacious room with a pointy roof, which really makes you feel like staying in a castle. The location is perfect. It is just a few steps from Macy's Time Square and Theatre District. Everything worked just perfect with the keys etc. Thank you so much Jennifer, we had a great time in New York.\rAttention: it's on the 4th floor without a lift :-) but definetely worth it!",90,False,en,0.410124,0.514050,Positive
3,2595,34320,2010-04-09,71130,Kai-Uwe,"We've been staying here for about 9 nights, enjoying to be in the center of the city, that never sleeps...short ways to everywhere in Manhattan, by subway or by walk. Midtown castle is a beauftiful and tastful place, Jennifer and Tori relaxed and friendly hosts - thats why we - the three Berliners - recommand that place! Good to have WiFi and a little kitchen too!",66,False,en,0.266875,0.460000,Positive
4,2595,46312,2010-05-25,117113,Alicia,We had a wonderful stay at Jennifer's charming apartment! They were very organized and helpful; I would definitely recommend staying at the Midtown Castle!,24,False,en,0.518750,0.700000,Positive
...,...,...,...,...,...,...,...,...,...,...,...,...
966813,1295054021439479562,1318207836791174950,2024-12-23,485243180,Vennela,"The place was amazing! Great location and area, 5 min from transit and convenient grocery stores super close by. Place looks exactly like the pictures; super clean and a great find in nyc! Freddy was super responsive and very accommodating.",40,False,en,0.485185,0.627778,Positive
966814,1295058207773420592,1302324242336666111,2024-12-01,368726917,Veronica,"This was the perfect stay for our trip. Margarita was literally the best. She went above and beyond to make sure check in and check out was smooth. Any questions I had she answered instantly even during the holidays! The place is beautiful, very clean, and has everything you need! Would recommend to anyone. Can’t wait to book again. 😊My favorite thing was having the subway only walking distance.",69,False,en,0.484583,0.736556,Positive
966815,1295058207773420592,1305143295960134335,2024-12-05,431518239,Rudy,….amazing place I felt at home,7,False,en,0.000000,0.000000,Neutral
966816,1295058207773420592,1309605174750125344,2024-12-11,26420925,Tara,We loved this place and would stay again. Our host even left chocolate and marshmallows for a treat and checked in regularly. Thanks so much !!,26,False,en,0.303125,0.300000,Positive


In [None]:
pd.set_option('display.max_colwidth', None)

In [None]:
reviews_data_textblob.head(20)

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments,word_count,has_html,lang,polarity,subjectivity,sentiment
0,2595,17857,2009-11-21,50679,Jean,"Our three-night stay. We enjoyed the apartment which is very well located. Pleasant, clean and well maintained. It is ideal for a family of 3 or 4 people. Small problems when we arrived there was no one to receive us, and the soap for the shower, the dishwashing liquid were missing, we received them the day after tomorrow. There is also the noise of the NY Metro, so a difficult first night if you are not used to it. Jennifer is correct the refund of the deposit was very fast. Apart from these small details our short stay went well. If I have the opportunity to return to NY for the holidays, I will stay at ""The Midtown Castle"" Jean Possession - Reunion Island",124,False,en,0.1675,0.5775,Positive
1,2595,19176,2009-12-05,53267,Cate,Great experience.,2,False,ro,0.8,0.75,Positive
2,2595,19760,2009-12-10,38960,Anita,"I've stayed with my friend at the Midtown Castle for six days and it was a lovely place to be. A big spacious room with a pointy roof, which really makes you feel like staying in a castle. The location is perfect. It is just a few steps from Macy's Time Square and Theatre District. Everything worked just perfect with the keys etc. Thank you so much Jennifer, we had a great time in New York.\rAttention: it's on the 4th floor without a lift :-) but definetely worth it!",90,False,en,0.410124,0.51405,Positive
3,2595,34320,2010-04-09,71130,Kai-Uwe,"We've been staying here for about 9 nights, enjoying to be in the center of the city, that never sleeps...short ways to everywhere in Manhattan, by subway or by walk. Midtown castle is a beauftiful and tastful place, Jennifer and Tori relaxed and friendly hosts - thats why we - the three Berliners - recommand that place! Good to have WiFi and a little kitchen too!",66,False,en,0.266875,0.46,Positive
4,2595,46312,2010-05-25,117113,Alicia,We had a wonderful stay at Jennifer's charming apartment! They were very organized and helpful; I would definitely recommend staying at the Midtown Castle!,24,False,en,0.51875,0.7,Positive
5,2595,1238204,2012-05-07,1783688,Sergey,"Hi to everyone!\rWould say our greatest compliments to Jennifer, the host of Midtown Castle. We spent in this lovely apartment in the heart of Manhattan one month (April, 2012) and will remember this time as ours best.\rThe apartment is pretty spacious and great located - the 5-th Ave right around the corner. There is everything you can need during your short or long stay. Jennifer is very friendly, vigorous and very responsible host. Thanks her and highly recomend this apartment for everyone who are looking for a quiet place right in the center of the boiling Midtown!",99,False,en,0.309621,0.51311,Positive
6,2595,1293632,2012-05-17,1870771,Loïc,"Jennifer was very friendly and helpful, and her place is exactly as advertised. The location is very convenient, and it was a pleasure to stay at the Midtown Castle. I definitely recommend it :)\r\rThanks !",37,False,en,0.28125,0.483333,Positive
7,2595,2022498,2012-08-18,2124102,Melanie,"This apartment is like a real castle old and unique. The age- related stains on the bathroom floor and dark discolorations in the carpet too indeed indicate that the building was built a long, long time ago. \rWe weren’t happy with the way the apartment was cleaned. Jennifer, the host sent in a cleaning lady right the next morning and therefore did everything she thought was appropriate to make our stay comfortable. She let us know that guest satisfaction is important to her. \rAfter we found out that the key for the main entrance was missing by locking ourselves out in the middle of the night she was so kind to bring us the missing key within an hour. \rWe had to leave the apartment a few days earlier because we suffered from an allergic reaction (which has never happened before and never after) related to the air conditioner as our doctor told us. \rThe apartment is located in a great area! Subway stations are just two minutes away! The area seems quite safe although there are “special” adult video stores located in the same street nearby. \rA deli/grocery store is next to the building, a 24 hours open pharmacy is across the end of the street! \r",208,False,en,0.164484,0.467183,Positive
8,2595,4682989,2013-05-20,496053,Eric,"Jennifer's place was in a great midtown location, close to the subway, Bryant Park and Times Square. She met us after midnight to give us keys to the place, which we appreciated.\r\rThe apartment is spacious for two people; a deli next door is perfect for last minute toiletries.\r\rThanks for allowing us to stay!",57,False,en,0.375,0.352778,Positive
9,2595,13193832,2014-05-21,13685934,Gerald,Jennifer is a very nice host. Everything is clean and she really takes care of her guests. We can only recommend her apartment to others.,25,False,en,0.336667,0.725,Positive


In [None]:
reviews_data_textblob[reviews_data_textblob['sentiment'] == 'Negative'].head(20)

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments,word_count,has_html,lang,polarity,subjectivity,sentiment
120,64365,868949,2012-01-17,253321,Robert,Always a pleasure staying at Anthony's place. Well organized and accommodating - even when my arrival got delayed several times due to flight disruptions.,24,False,en,-0.0625,0.1875,Negative
207,6848,413393741,2019-02-17,117326652,Janis,Polite hosts and a cozy appartment with all amneties.,9,False,en,-0.2,0.75,Negative
409,6848,619071518,2020-03-16,294564318,Caroline Dahl,"There are a lot of noise, not only from the neighbourhood, but you can hear everything from the hosts who live upstairs. They have a bird or another animal that makes noises all day long and night. Additionally, every morning it sounded like someone was trying to break down the window in the kitchen - rather disturbing.",57,False,en,-0.113838,0.597778,Negative
489,219066,402807235,2019-01-19,35622526,영택,"Cozy place, recommend!",3,False,en,-0.25,0.75,Negative
1274,225976,9401775,2013-12-27,5654306,Tania,"I guess we had different expectations than the other reviewers. The place is a mess. Moldy smell (so bad that we slept with the window open even though it was below freezing). Doors, walls and carpets were dirty. It was not even safe: steep stairs without railing, gas stove not functioning well in a room without windows, cracked tiles. The bathroom was a nightmare,",64,False,en,-0.157143,0.516667,Negative
2316,7097,889154943112641669,2023-05-11,152738901,Terry,"My 1st negative experience in all the years of air b n b's. This place was not clean, nor did it look like much had been done in prep for our arrival. Very superficially dusted. The shelves for glasses were gritty, the glass surface for the hot plate was grimy, utensils were dirty, random blankets for bedding were strewn on a metal shelf in the livingrm that was also pinching a mattress btw the floor & a closet, light switch was very dirty, pillows on chairs and sofa looked like someone had just gotten up from napping, a full seam on the sofa pillow was torn & held closed by a safety pin. My son came to visit & brought his little dog, which I didn't realize was not welcome. Sorry that happened. The dog didn't return after we realized ""no dogs allowed."" And, I would've appreciated being notified that Jane was planning to come downstairs while we were out. The screen had been moved which felt a little creepy. Plusses: The place is large and great location.",178,False,en,-0.081202,0.601429,Negative
3353,231154,336505661,2018-10-14,198911239,Vani,This ia a horrible place to rent,7,False,en,-1.0,1.0,Negative
3432,10452,3789924,2013-03-15,5115210,Pauline,Very large room and clean and spacious common area as well. 5min walk from the C subway which takes you to the center of Manhattan in about 30min. Only downside: impossible to adjust the heating in the room so the heat requires you to distort the open window... I recommend this apartment,48,False,en,-0.060204,0.622449,Negative
3578,232612,20288221,2014-09-27,716807,Barbara,"Dee came to welcome us when we checked in, having been given the key by her manager the day before, despite her suffering from a bad cold.",27,False,en,-0.125,0.891667,Negative
3798,12192,1759281,2012-07-21,2094298,Audrey,"We were welcomed by a friend of Edward because he had gone camping. There, bad surprise: the accommodation, which we had understood to be a ground floor, was in fact a cellar in a courtyard full of garbage, trash cans, and used furniture. The ""apartment"" had no windows, no ventilation, was really of questionable cleanliness, the fridge did not work... The first evening while cooking, we had the unpleasant surprise of meeting a cockroach, then two, then three... Same in the courtyard where they were swarming. The second evening, we realized that there were fleas in the bedding, so we left. Even though we had paid for five nights. We never met Edward, who still sent us an email to see if there was a problem... I forget: the encounters with the local bums who wash themselves with a hose in the yard or who rummage through the trash when you go out in the morning. Even if it is eminently well located in NY (although the East Village takes on the appearance of a courtyard of miracles during the day, with gatherings of misfits), inexpensive, and not far from transport and a convenience store, an address to absolutely avoid, unless you are a very very very hardened backpacker.",216,False,en,-0.07,0.492917,Negative


In [None]:
reviews_data_textblob['sentiment'].value_counts()

Unnamed: 0_level_0,count
sentiment,Unnamed: 1_level_1
Positive,930279
Neutral,26581
Negative,9958


In [None]:
local_law_date = '2023-09-05'
df_pre_blob = reviews_data_textblob[reviews_data_textblob['date'] < local_law_date]
df_post_blob = reviews_data_textblob[reviews_data_textblob['date'] >= local_law_date]
print(df_pre_blob.count())
print(df_post_blob.count())

listing_id       781039
id               781039
date             781039
reviewer_id      781039
reviewer_name    781039
comments         781039
word_count       781039
has_html         781039
lang             781039
polarity         781039
subjectivity     781039
sentiment        781039
dtype: int64
listing_id       185779
id               185779
date             185779
reviewer_id      185779
reviewer_name    185779
comments         185779
word_count       185779
has_html         185779
lang             185779
polarity         185779
subjectivity     185779
sentiment        185779
dtype: int64


In [None]:
#compare avg polarity and sentiment for df_pre and df_post
print('Polarity_pre', round(df_pre_blob['polarity'].mean(), 5))
print('subjectivity_pre', round(df_pre_blob['subjectivity'].mean(), 5))
print(df_pre_blob['sentiment'].value_counts())

print('polarity_post', round(df_post_blob['polarity'].mean(), 5))
print('subjectivity_post', round(df_post_blob['subjectivity'].mean(), 5))
print(df_post_blob['sentiment'].value_counts())

Polarity_pre 0.42533
subjectivity_pre 0.63417
sentiment
Positive    754376
Neutral      19569
Negative      7094
Name: count, dtype: int64
polarity_post 0.41298
subjectivity_post 0.62128
sentiment
Positive    175903
Neutral       7012
Negative      2864
Name: count, dtype: int64


In [None]:
# Calculating % of reviews from whole df
pre_pos = round(df_pre_blob[df_pre_blob['sentiment'] == 'Positive'].shape[0] / df_pre_blob.shape[0] * 100, 2)
pre_neg = round(df_pre_blob[df_pre_blob['sentiment'] == 'Negative'].shape[0] / df_pre_blob.shape[0] * 100, 2)
pre_neu = round(df_pre_blob[df_pre_blob['sentiment'] == 'Neutral'].shape[0] / df_pre_blob.shape[0] * 100, 2)
post_pos = round(df_post_blob[df_post_blob['sentiment'] == 'Positive'].shape[0] / df_post_blob.shape[0] * 100, 2)
post_neg = round(df_post_blob[df_post_blob['sentiment'] == 'Negative'].shape[0] / df_post_blob.shape[0] * 100, 2)
post_neu = round(df_post_blob[df_post_blob['sentiment'] == 'Neutral'].shape[0] / df_post_blob.shape[0] * 100, 2)
whole_pos = round(reviews_data_textblob[reviews_data_textblob['sentiment'] == 'Positive'].shape[0] / reviews_data_textblob.shape[0] * 100, 2)
whole_neg = round(reviews_data_textblob[reviews_data_textblob['sentiment'] == 'Negative'].shape[0] / reviews_data_textblob.shape[0] * 100, 2)
whole_neu = round(reviews_data_textblob[reviews_data_textblob['sentiment'] == 'Neutral'].shape[0] / reviews_data_textblob.shape[0] * 100, 2)

# making summary table for textblob:
summary_table = pd.DataFrame({
    'Dataset': ['Whole Dataset', 'Pre-Law', 'Post-Law'],
    'Polarity': [round(reviews_data_textblob['polarity'].mean(), 5), round(df_pre_blob['polarity'].mean(), 5), round(df_post_blob['polarity'].mean(), 5)],
    'Subjectivity': [round(reviews_data_textblob['subjectivity'].mean(), 5), round(df_pre_blob['subjectivity'].mean(), 5), round(df_post_blob['subjectivity'].mean(), 5)],
    'Positive (%)': [whole_pos, pre_pos, post_pos],
    'Negative (%)': [whole_neg, pre_neg, post_neg],
    'Neutral (%)': [whole_neu, pre_neu, post_neu],
    'Total Reviews': [reviews_data_textblob.shape[0], df_pre_blob.shape[0], df_post_blob.shape[0]]
})

summary_table

Unnamed: 0,Dataset,Polarity,Subjectivity,Positive (%),Negative (%),Neutral (%),Total Reviews
0,Whole Dataset,0.42295,0.63169,96.22,1.03,2.75,966818
1,Pre-Law,0.42533,0.63417,96.59,0.91,2.51,781039
2,Post-Law,0.41298,0.62128,94.68,1.54,3.77,185779


# VADER

In [None]:
#copy data for vader
reviews_data_vader = reviews_data.copy()

In [None]:
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/126.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [None]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
sia = SentimentIntensityAnalyzer()
reviews_data_vader['sentiment'] = reviews_data_vader['comments'].apply(lambda x: sia.polarity_scores(x)['compound'])
reviews_data_vader

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments,word_count,has_html,lang,sentiment
0,2595,17857,2009-11-21,50679,Jean,"Our three-night stay. We enjoyed the apartment which is very well located. Pleasant, clean and well maintained. It is ideal for a family of 3 or 4 people. Small problems when we arrived there was no one to receive us, and the soap for the shower, the dishwashing liquid were missing, we received them the day after tomorrow. There is also the noise of the NY Metro, so a difficult first night if you are not used to it. Jennifer is correct the refund of the deposit was very fast. Apart from these small details our short stay went well. If I have the opportunity to return to NY for the holidays, I will stay at ""The Midtown Castle"" Jean Possession - Reunion Island",124,False,en,0.9349
1,2595,19176,2009-12-05,53267,Cate,Great experience.,2,False,ro,0.6249
2,2595,19760,2009-12-10,38960,Anita,"I've stayed with my friend at the Midtown Castle for six days and it was a lovely place to be. A big spacious room with a pointy roof, which really makes you feel like staying in a castle. The location is perfect. It is just a few steps from Macy's Time Square and Theatre District. Everything worked just perfect with the keys etc. Thank you so much Jennifer, we had a great time in New York.\rAttention: it's on the 4th floor without a lift :-) but definetely worth it!",90,False,en,0.9248
3,2595,34320,2010-04-09,71130,Kai-Uwe,"We've been staying here for about 9 nights, enjoying to be in the center of the city, that never sleeps...short ways to everywhere in Manhattan, by subway or by walk. Midtown castle is a beauftiful and tastful place, Jennifer and Tori relaxed and friendly hosts - thats why we - the three Berliners - recommand that place! Good to have WiFi and a little kitchen too!",66,False,en,0.9229
4,2595,46312,2010-05-25,117113,Alicia,We had a wonderful stay at Jennifer's charming apartment! They were very organized and helpful; I would definitely recommend staying at the Midtown Castle!,24,False,en,0.9464
...,...,...,...,...,...,...,...,...,...,...
966813,1295054021439479562,1318207836791174950,2024-12-23,485243180,Vennela,"The place was amazing! Great location and area, 5 min from transit and convenient grocery stores super close by. Place looks exactly like the pictures; super clean and a great find in nyc! Freddy was super responsive and very accommodating.",40,False,en,0.9861
966814,1295058207773420592,1302324242336666111,2024-12-01,368726917,Veronica,"This was the perfect stay for our trip. Margarita was literally the best. She went above and beyond to make sure check in and check out was smooth. Any questions I had she answered instantly even during the holidays! The place is beautiful, very clean, and has everything you need! Would recommend to anyone. Can’t wait to book again. 😊My favorite thing was having the subway only walking distance.",69,False,en,0.9771
966815,1295058207773420592,1305143295960134335,2024-12-05,431518239,Rudy,….amazing place I felt at home,7,False,en,0.0000
966816,1295058207773420592,1309605174750125344,2024-12-11,26420925,Tara,We loved this place and would stay again. Our host even left chocolate and marshmallows for a treat and checked in regularly. Thanks so much !!,26,False,en,0.8774


In [None]:
reviews_data_vader['vader_scores'] = reviews_data_vader['comments'].apply(lambda x: sia.polarity_scores(x))

In [None]:
reviews_data_vader['vader_sentiment'] = reviews_data_vader['vader_scores'].apply(lambda x: 'positive' if x['compound'] > 0.05
                                                                     else 'negative' if x['compound'] < -0.05
                                                                     else 'neutral')


In [None]:
reviews_data_vader

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments,word_count,has_html,lang,sentiment,vader_scores,vader_sentiment
0,2595,17857,2009-11-21,50679,Jean,"Our three-night stay. We enjoyed the apartment which is very well located. Pleasant, clean and well maintained. It is ideal for a family of 3 or 4 people. Small problems when we arrived there was no one to receive us, and the soap for the shower, the dishwashing liquid were missing, we received them the day after tomorrow. There is also the noise of the NY Metro, so a difficult first night if you are not used to it. Jennifer is correct the refund of the deposit was very fast. Apart from these small details our short stay went well. If I have the opportunity to return to NY for the holidays, I will stay at ""The Midtown Castle"" Jean Possession - Reunion Island",124,False,en,0.9349,"{'neg': 0.074, 'neu': 0.743, 'pos': 0.183, 'compound': 0.9349}",positive
1,2595,19176,2009-12-05,53267,Cate,Great experience.,2,False,ro,0.6249,"{'neg': 0.0, 'neu': 0.196, 'pos': 0.804, 'compound': 0.6249}",positive
2,2595,19760,2009-12-10,38960,Anita,"I've stayed with my friend at the Midtown Castle for six days and it was a lovely place to be. A big spacious room with a pointy roof, which really makes you feel like staying in a castle. The location is perfect. It is just a few steps from Macy's Time Square and Theatre District. Everything worked just perfect with the keys etc. Thank you so much Jennifer, we had a great time in New York.\rAttention: it's on the 4th floor without a lift :-) but definetely worth it!",90,False,en,0.9248,"{'neg': 0.016, 'neu': 0.793, 'pos': 0.192, 'compound': 0.9248}",positive
3,2595,34320,2010-04-09,71130,Kai-Uwe,"We've been staying here for about 9 nights, enjoying to be in the center of the city, that never sleeps...short ways to everywhere in Manhattan, by subway or by walk. Midtown castle is a beauftiful and tastful place, Jennifer and Tori relaxed and friendly hosts - thats why we - the three Berliners - recommand that place! Good to have WiFi and a little kitchen too!",66,False,en,0.9229,"{'neg': 0.0, 'neu': 0.808, 'pos': 0.192, 'compound': 0.9229}",positive
4,2595,46312,2010-05-25,117113,Alicia,We had a wonderful stay at Jennifer's charming apartment! They were very organized and helpful; I would definitely recommend staying at the Midtown Castle!,24,False,en,0.9464,"{'neg': 0.0, 'neu': 0.51, 'pos': 0.49, 'compound': 0.9464}",positive
...,...,...,...,...,...,...,...,...,...,...,...,...
966813,1295054021439479562,1318207836791174950,2024-12-23,485243180,Vennela,"The place was amazing! Great location and area, 5 min from transit and convenient grocery stores super close by. Place looks exactly like the pictures; super clean and a great find in nyc! Freddy was super responsive and very accommodating.",40,False,en,0.9861,"{'neg': 0.0, 'neu': 0.476, 'pos': 0.524, 'compound': 0.9861}",positive
966814,1295058207773420592,1302324242336666111,2024-12-01,368726917,Veronica,"This was the perfect stay for our trip. Margarita was literally the best. She went above and beyond to make sure check in and check out was smooth. Any questions I had she answered instantly even during the holidays! The place is beautiful, very clean, and has everything you need! Would recommend to anyone. Can’t wait to book again. 😊My favorite thing was having the subway only walking distance.",69,False,en,0.9771,"{'neg': 0.0, 'neu': 0.699, 'pos': 0.301, 'compound': 0.9771}",positive
966815,1295058207773420592,1305143295960134335,2024-12-05,431518239,Rudy,….amazing place I felt at home,7,False,en,0.0000,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}",neutral
966816,1295058207773420592,1309605174750125344,2024-12-11,26420925,Tara,We loved this place and would stay again. Our host even left chocolate and marshmallows for a treat and checked in regularly. Thanks so much !!,26,False,en,0.8774,"{'neg': 0.0, 'neu': 0.686, 'pos': 0.314, 'compound': 0.8774}",positive


In [None]:
local_law_date = '2023-09-05'
df_pre_vad = reviews_data_vader[reviews_data_vader['date'] < local_law_date]
df_post_vad = reviews_data_vader[reviews_data_vader['date'] >= local_law_date]
print(df_pre_vad.count())
print(df_post_vad.count())

listing_id         781039
id                 781039
date               781039
reviewer_id        781039
reviewer_name      781039
comments           781039
word_count         781039
has_html           781039
lang               781039
sentiment          781039
vader_scores       781039
vader_sentiment    781039
dtype: int64
listing_id         185779
id                 185779
date               185779
reviewer_id        185779
reviewer_name      185779
comments           185779
word_count         185779
has_html           185779
lang               185779
sentiment          185779
vader_scores       185779
vader_sentiment    185779
dtype: int64


In [None]:
pre_pos1 = round(df_pre_vad[df_pre_vad['vader_sentiment'] == 'positive'].shape[0] / df_pre_vad.shape[0] * 100, 2)
pre_neg1 = round(df_pre_vad[df_pre_vad['vader_sentiment'] == 'negative'].shape[0] / df_pre_vad.shape[0] * 100, 2)
pre_neu1 = round(df_pre_vad[df_pre_vad['vader_sentiment'] == 'neutral'].shape[0] / df_pre_vad.shape[0] * 100, 2)
post_pos1 = round(df_post_vad[df_post_vad['vader_sentiment'] == 'positive'].shape[0] / df_post_vad.shape[0] * 100, 2)
post_neg1 = round(df_post_vad[df_post_vad['vader_sentiment'] == 'negative'].shape[0] / df_post_vad.shape[0] * 100, 2)
post_neu1 = round(df_post_vad[df_post_vad['vader_sentiment'] == 'neutral'].shape[0] / df_post_vad.shape[0] * 100, 2)
whole_pos1 = round(reviews_data_vader[reviews_data_vader['vader_sentiment'] == 'positive'].shape[0] / reviews_data_vader.shape[0] * 100, 2)
whole_neg1 = round(reviews_data_vader[reviews_data_vader['vader_sentiment'] == 'negative'].shape[0] / reviews_data_vader.shape[0] * 100, 2)
whole_neu1 = round(reviews_data_vader[reviews_data_vader['vader_sentiment'] == 'neutral'].shape[0] / reviews_data_vader.shape[0] * 100, 2)


In [None]:
# making summary table for textblob:
summary_table1 = pd.DataFrame({
    'Dataset': ['Whole Dataset', 'Pre-Law', 'Post-Law'],
    'Positive (%)': [whole_pos1, pre_pos1, post_pos1],
    'Negative (%)': [whole_neg1, pre_neg1, post_neg1],
    'Neutral (%)': [whole_neu1, pre_neu1, post_neu1],
    'Total Reviews': [reviews_data_vader.shape[0], df_pre_vad.shape[0], df_post_vad.shape[0]]
})

summary_table1

Unnamed: 0,Dataset,Positive (%),Negative (%),Neutral (%),Total Reviews
0,Whole Dataset,96.47,1.9,1.63,966818
1,Pre-Law,96.76,1.7,1.54,781039
2,Post-Law,95.22,2.74,2.03,185779


# Analyzing Both in Comparison



In [None]:
#change name sentiment to textblob_sentiment
reviews_data_textblob.rename(columns={'sentiment': 'textblob_sentiment'}, inplace=True)

In [None]:
#joining sentiment from reviews_data_textblob with vader_sentiment from reviews_data_vader on reviewer id
comparison = pd.merge(reviews_data_textblob[['reviewer_id','textblob_sentiment']], reviews_data_vader[['reviewer_id', 'vader_sentiment']], on='reviewer_id', how='inner')
comparison.head()

Unnamed: 0,reviewer_id,textblob_sentiment,vader_sentiment
0,50679,Positive,positive
1,53267,Positive,positive
2,38960,Positive,positive
3,71130,Positive,positive
4,117113,Positive,positive


In [None]:
#lowercase textblob_sentiment
comparison['textblob_sentiment'] = comparison['textblob_sentiment'].str.lower()

In [None]:
#new column for difference between sentiment and vader
def compare_sentiments(df):
  df['compare'] = df.apply(lambda x: 'same' if str(x['textblob_sentiment']) == str(x['vader_sentiment']) else 'different', axis=1)
  return df

comparison = compare_sentiments(comparison)
print(comparison[['textblob_sentiment', 'vader_sentiment', 'compare']].head())

  textblob_sentiment vader_sentiment compare
0           positive        positive    same
1           positive        positive    same
2           positive        positive    same
3           positive        positive    same
4           positive        positive    same


In [None]:
#count of the numeber of same and different
counts = comparison['compare'].value_counts()
counts

Unnamed: 0_level_0,count
compare,Unnamed: 1_level_1
same,1325511
different,67493
