In [1]:
import pandas as pd
from textblob import TextBlob

In [2]:
data = pd.read_csv('C:/Users/Lenovo/Desktop/Datasets/chennai_reviews.csv')
data.head()

Unnamed: 0,Hotel_name,Review_Title,Review_Text,Sentiment,Rating_Percentage,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,Accord Metropolitan,Excellent comfortableness during stay,Its really nice place to stay especially for b...,3,100,,,,
1,Accord Metropolitan,Not too comfortable,It seems that hotel does not check the basic a...,1,20,,,,
2,Accord Metropolitan,,Worst hotel I have ever encountered. I will ne...,1,20,,,,
3,Accord Metropolitan,Best hotel,Had a good time in this hotel and the staff Ku...,3,100,,,,
4,Accord Metropolitan,,good hotel and staff Veg food good non veg bre...,3,100,,,,


In [3]:
data.dtypes

Hotel_name            object
Review_Title          object
Review_Text           object
Sentiment             object
Rating_Percentage     object
Unnamed: 5           float64
Unnamed: 6            object
Unnamed: 7           float64
Unnamed: 8           float64
dtype: object

In [4]:
data.dropna(subset = ["Review_Text"], inplace= True)

In [5]:
data = data[['Hotel_name','Review_Text','Sentiment']]

In [6]:
data.head()

Unnamed: 0,Hotel_name,Review_Text,Sentiment
0,Accord Metropolitan,Its really nice place to stay especially for b...,3
1,Accord Metropolitan,It seems that hotel does not check the basic a...,1
2,Accord Metropolitan,Worst hotel I have ever encountered. I will ne...,1
3,Accord Metropolitan,Had a good time in this hotel and the staff Ku...,3
4,Accord Metropolitan,good hotel and staff Veg food good non veg bre...,3


In [7]:
data.isnull().sum()

Hotel_name     0
Review_Text    0
Sentiment      1
dtype: int64

In [8]:
data.dtypes

Hotel_name     object
Review_Text    object
Sentiment      object
dtype: object

In [9]:
from nltk.corpus import stopwords
from nltk import word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer

In [10]:
def identify_tokens(row):
    tokens = word_tokenize(row)
    # taken only words (not punctuation)
    token_words = [w for w in tokens if w.isalpha()]
    return token_words

data['toke_text'] = data['Review_Text'].apply(identify_tokens)

In [11]:
data.head()

Unnamed: 0,Hotel_name,Review_Text,Sentiment,toke_text
0,Accord Metropolitan,Its really nice place to stay especially for b...,3,"[Its, really, nice, place, to, stay, especiall..."
1,Accord Metropolitan,It seems that hotel does not check the basic a...,1,"[It, seems, that, hotel, does, not, check, the..."
2,Accord Metropolitan,Worst hotel I have ever encountered. I will ne...,1,"[Worst, hotel, I, have, ever, encountered, I, ..."
3,Accord Metropolitan,Had a good time in this hotel and the staff Ku...,3,"[Had, a, good, time, in, this, hotel, and, the..."
4,Accord Metropolitan,good hotel and staff Veg food good non veg bre...,3,"[good, hotel, and, staff, Veg, food, good, non..."


In [12]:
le = WordNetLemmatizer()
def lem_list(row):
    lematized_list = [le.lemmatize(word) for word in row]
    return (lematized_list)

data['lematized_text'] = data['toke_text'].apply(lem_list)

In [13]:
stops = set(stopwords.words("english"))                  

def remove_stops(row):
    meaningful_words = [w for w in row if not w in stops]
    return (meaningful_words)

data['stop_text'] = data['lematized_text'].apply(remove_stops)

In [14]:
def rejoin_words(row):
    joined_words = ( " ".join(row))
    return joined_words

data['new_text'] = data['stop_text'].apply(rejoin_words)

In [15]:
# create a function to ge subjectivity
def getSubjectivity(text):
    return TextBlob(text).sentiment.subjectivity

# Create a function to get the polarity
def getPolarity(text):
    return  TextBlob(text).sentiment.polarity

# Create two new columns 'Subjectivity' & 'Polarity'
data['subjectivity'] = data['new_text'].apply(getSubjectivity)
data['polarity'] = data['new_text'].apply(getPolarity)

# Show the new dataframe with columns 'Subjectivity' & 'Polarity'
data.head()

Unnamed: 0,Hotel_name,Review_Text,Sentiment,toke_text,lematized_text,stop_text,new_text,subjectivity,polarity
0,Accord Metropolitan,Its really nice place to stay especially for b...,3,"[Its, really, nice, place, to, stay, especiall...","[Its, really, nice, place, to, stay, especiall...","[Its, really, nice, place, stay, especially, b...",Its really nice place stay especially business...,1.0,0.3
1,Accord Metropolitan,It seems that hotel does not check the basic a...,1,"[It, seems, that, hotel, does, not, check, the...","[It, seems, that, hotel, doe, not, check, the,...","[It, seems, hotel, doe, check, basic, amenity,...",It seems hotel doe check basic amenity room ha...,0.125,0.0
2,Accord Metropolitan,Worst hotel I have ever encountered. I will ne...,1,"[Worst, hotel, I, have, ever, encountered, I, ...","[Worst, hotel, I, have, ever, encountered, I, ...","[Worst, hotel, I, ever, encountered, I, never,...",Worst hotel I ever encountered I never think s...,0.5625,-0.5
3,Accord Metropolitan,Had a good time in this hotel and the staff Ku...,3,"[Had, a, good, time, in, this, hotel, and, the...","[Had, a, good, time, in, this, hotel, and, the...","[Had, good, time, hotel, staff, Kumar, aishwar...",Had good time hotel staff Kumar aishwarya hous...,0.724074,0.538889
4,Accord Metropolitan,good hotel and staff Veg food good non veg bre...,3,"[good, hotel, and, staff, Veg, food, good, non...","[good, hotel, and, staff, Veg, food, good, non...","[good, hotel, staff, Veg, food, good, non, veg...",good hotel staff Veg food good non veg breakfa...,0.6,0.7


In [16]:
# Create a function to compute negative (-1), neutral (0) and positive (+1) analysis
def getAnalysis(score):
    if score < 0:
        return 'Negative'
    elif score == 0:
        return 'Neutral'
    else:
        return 'Positive'


data['analysis'] = data['polarity'].apply(getAnalysis)

# Show the dataframe
data.head()

Unnamed: 0,Hotel_name,Review_Text,Sentiment,toke_text,lematized_text,stop_text,new_text,subjectivity,polarity,analysis
0,Accord Metropolitan,Its really nice place to stay especially for b...,3,"[Its, really, nice, place, to, stay, especiall...","[Its, really, nice, place, to, stay, especiall...","[Its, really, nice, place, stay, especially, b...",Its really nice place stay especially business...,1.0,0.3,Positive
1,Accord Metropolitan,It seems that hotel does not check the basic a...,1,"[It, seems, that, hotel, does, not, check, the...","[It, seems, that, hotel, doe, not, check, the,...","[It, seems, hotel, doe, check, basic, amenity,...",It seems hotel doe check basic amenity room ha...,0.125,0.0,Neutral
2,Accord Metropolitan,Worst hotel I have ever encountered. I will ne...,1,"[Worst, hotel, I, have, ever, encountered, I, ...","[Worst, hotel, I, have, ever, encountered, I, ...","[Worst, hotel, I, ever, encountered, I, never,...",Worst hotel I ever encountered I never think s...,0.5625,-0.5,Negative
3,Accord Metropolitan,Had a good time in this hotel and the staff Ku...,3,"[Had, a, good, time, in, this, hotel, and, the...","[Had, a, good, time, in, this, hotel, and, the...","[Had, good, time, hotel, staff, Kumar, aishwar...",Had good time hotel staff Kumar aishwarya hous...,0.724074,0.538889,Positive
4,Accord Metropolitan,good hotel and staff Veg food good non veg bre...,3,"[good, hotel, and, staff, Veg, food, good, non...","[good, hotel, and, staff, Veg, food, good, non...","[good, hotel, staff, Veg, food, good, non, veg...",good hotel staff Veg food good non veg breakfa...,0.6,0.7,Positive


In [17]:
data['analysis'].unique()

array(['Positive', 'Neutral', 'Negative'], dtype=object)

In [18]:
data.head(10)

Unnamed: 0,Hotel_name,Review_Text,Sentiment,toke_text,lematized_text,stop_text,new_text,subjectivity,polarity,analysis
0,Accord Metropolitan,Its really nice place to stay especially for b...,3,"[Its, really, nice, place, to, stay, especiall...","[Its, really, nice, place, to, stay, especiall...","[Its, really, nice, place, stay, especially, b...",Its really nice place stay especially business...,1.0,0.3,Positive
1,Accord Metropolitan,It seems that hotel does not check the basic a...,1,"[It, seems, that, hotel, does, not, check, the...","[It, seems, that, hotel, doe, not, check, the,...","[It, seems, hotel, doe, check, basic, amenity,...",It seems hotel doe check basic amenity room ha...,0.125,0.0,Neutral
2,Accord Metropolitan,Worst hotel I have ever encountered. I will ne...,1,"[Worst, hotel, I, have, ever, encountered, I, ...","[Worst, hotel, I, have, ever, encountered, I, ...","[Worst, hotel, I, ever, encountered, I, never,...",Worst hotel I ever encountered I never think s...,0.5625,-0.5,Negative
3,Accord Metropolitan,Had a good time in this hotel and the staff Ku...,3,"[Had, a, good, time, in, this, hotel, and, the...","[Had, a, good, time, in, this, hotel, and, the...","[Had, good, time, hotel, staff, Kumar, aishwar...",Had good time hotel staff Kumar aishwarya hous...,0.724074,0.538889,Positive
4,Accord Metropolitan,good hotel and staff Veg food good non veg bre...,3,"[good, hotel, and, staff, Veg, food, good, non...","[good, hotel, and, staff, Veg, food, good, non...","[good, hotel, staff, Veg, food, good, non, veg...",good hotel staff Veg food good non veg breakfa...,0.6,0.7,Positive
5,Accord Metropolitan,Great hotel. The staff if welcoming. Helpful. ...,3,"[Great, hotel, The, staff, if, welcoming, Help...","[Great, hotel, The, staff, if, welcoming, Help...","[Great, hotel, The, staff, welcoming, Helpful,...",Great hotel The staff welcoming Helpful Always,0.75,0.8,Positive
6,Accord Metropolitan,"The hotel is mostly clean, but bathrooms need ...",3,"[The, hotel, is, mostly, clean, but, bathrooms...","[The, hotel, is, mostly, clean, but, bathroom,...","[The, hotel, mostly, clean, bathroom, need, ca...",The hotel mostly clean bathroom need care firs...,0.57619,0.37381,Positive
7,Accord Metropolitan,Awesome hotel with all needed amenities and lu...,3,"[Awesome, hotel, with, all, needed, amenities,...","[Awesome, hotel, with, all, needed, amenity, a...","[Awesome, hotel, needed, amenity, luxury, frie...",Awesome hotel needed amenity luxury friendly s...,0.75,0.725,Positive
8,Accord Metropolitan,Very nice \nComfortable,3,"[Very, nice, Comfortable]","[Very, nice, Comfortable]","[Very, nice, Comfortable]",Very nice Comfortable,0.9,0.59,Positive
9,Accord Metropolitan,The staff of the hotel were polite. The brick...,3,"[The, staff, of, the, hotel, were, polite, The...","[The, staff, of, the, hotel, were, polite, The...","[The, staff, hotel, polite, The, brick, oven, ...",The staff hotel polite The brick oven chef ext...,0.775,0.335417,Positive


In [19]:
# Print the percentage of positive tweets
ptweets = data[data['analysis'] == 'Positive']
ptweets = ptweets['new_text']

round( (ptweets.shape[0] / data.shape[0]) * 100 , 1)

84.5

In [20]:
# data = data.groupby('Hotel_name')

In [21]:
df = data['Hotel_name'].unique()


In [22]:
df = list(df)

In [23]:
data1 = data[['Hotel_name','analysis']]

In [24]:
data1.shape

(4767, 2)

In [50]:
for hm in range(len(df)):
    preview = data1[data1['analysis'] == 'Positive']
    preview = preview['Hotel_name']


In [51]:
preview.head()

0    Accord Metropolitan
3    Accord Metropolitan
4    Accord Metropolitan
5    Accord Metropolitan
6    Accord Metropolitan
Name: Hotel_name, dtype: object

In [52]:
data1.shape

(4767, 2)

In [53]:
a=preview.value_counts().sort_index()

In [54]:
a

Aadithya                              15
Abu sarovar portico ex Abu Palace     13
Accord Metropolitan                  112
Akash Inn                             14
Akshay Inn                            14
                                    ... 
Yashwanth Residency                    1
Zama Lodge                             5
Zone By The Park Orr Chennai           4
ibis Chennai City Centre              10
ibis Chennai Sipcot                   11
Name: Hotel_name, Length: 254, dtype: int64

In [55]:
b=data1['Hotel_name'].value_counts().sort_index()

In [56]:
b

Aadithya                              15
Abu sarovar portico ex Abu Palace     18
Accord Metropolitan                  124
Akash Inn                             14
Akshay Inn                            15
                                    ... 
Yashwanth Residency                    1
Zama Lodge                             5
Zone By The Park Orr Chennai           6
ibis Chennai City Centre              10
ibis Chennai Sipcot                   15
Name: Hotel_name, Length: 256, dtype: int64

In [60]:
c=(a/b)*100

In [63]:
c

Aadithya                             100.000000
Abu sarovar portico ex Abu Palace     72.222222
Accord Metropolitan                   90.322581
Akash Inn                            100.000000
Akshay Inn                            93.333333
                                        ...    
Yashwanth Residency                  100.000000
Zama Lodge                           100.000000
Zone By The Park Orr Chennai          66.666667
ibis Chennai City Centre             100.000000
ibis Chennai Sipcot                   73.333333
Name: Hotel_name, Length: 256, dtype: float64

In [77]:
positive_review=c.where(c >90)

In [78]:
positive_review.dropna()

Aadithya                             100.000000
Accord Metropolitan                   90.322581
Akash Inn                            100.000000
Akshay Inn                            93.333333
Aloft Chennai Omr - It Expressway     95.454545
                                        ...    
Trident Chennai                      100.000000
Turyaa Chennai                        91.304348
Yashwanth Residency                  100.000000
Zama Lodge                           100.000000
ibis Chennai City Centre             100.000000
Name: Hotel_name, Length: 112, dtype: float64

In [88]:
positive_review.index[0]

'Aadithya'

In [89]:
data.head()

Unnamed: 0,Hotel_name,Review_Text,Sentiment,toke_text,lematized_text,stop_text,new_text,subjectivity,polarity,analysis
0,Accord Metropolitan,Its really nice place to stay especially for b...,3,"[Its, really, nice, place, to, stay, especiall...","[Its, really, nice, place, to, stay, especiall...","[Its, really, nice, place, stay, especially, b...",Its really nice place stay especially business...,1.0,0.3,Positive
1,Accord Metropolitan,It seems that hotel does not check the basic a...,1,"[It, seems, that, hotel, does, not, check, the...","[It, seems, that, hotel, doe, not, check, the,...","[It, seems, hotel, doe, check, basic, amenity,...",It seems hotel doe check basic amenity room ha...,0.125,0.0,Neutral
2,Accord Metropolitan,Worst hotel I have ever encountered. I will ne...,1,"[Worst, hotel, I, have, ever, encountered, I, ...","[Worst, hotel, I, have, ever, encountered, I, ...","[Worst, hotel, I, ever, encountered, I, never,...",Worst hotel I ever encountered I never think s...,0.5625,-0.5,Negative
3,Accord Metropolitan,Had a good time in this hotel and the staff Ku...,3,"[Had, a, good, time, in, this, hotel, and, the...","[Had, a, good, time, in, this, hotel, and, the...","[Had, good, time, hotel, staff, Kumar, aishwar...",Had good time hotel staff Kumar aishwarya hous...,0.724074,0.538889,Positive
4,Accord Metropolitan,good hotel and staff Veg food good non veg bre...,3,"[good, hotel, and, staff, Veg, food, good, non...","[good, hotel, and, staff, Veg, food, good, non...","[good, hotel, staff, Veg, food, good, non, veg...",good hotel staff Veg food good non veg breakfa...,0.6,0.7,Positive


In [97]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(data['new_text'], data['analysis'], test_size = 0.2)

In [98]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)

In [99]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer()
x_train_tfidf = tfidf.fit_transform(x_train)
x_test_tfidf = tfidf.transform(x_test)

In [94]:
x_test_tfidf

<954x5023 sparse matrix of type '<class 'numpy.float64'>'
	with 18357 stored elements in Compressed Sparse Row format>

Decision Tree

In [100]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier()

In [101]:
clf.fit(x_train_tfidf, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [102]:
y_pred_dt = clf.predict(x_test_tfidf)

In [103]:
from sklearn.metrics import confusion_matrix, accuracy_score

y_pred = clf.predict(x_test_tfidf)

print('Accuracy Score:', accuracy_score(y_test, y_pred))
print('Confusion Matrix:', confusion_matrix(y_test, y_pred))

Accuracy Score: 0.9245283018867925
Confusion Matrix: [[ 78   2  21]
 [  5  36   7]
 [ 17  20 768]]


KNN

In [104]:
from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(x_train_tfidf, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

In [105]:
y_pred_knn = classifier.predict(x_test_tfidf)

In [107]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

print('Accuracy Score:\n', accuracy_score(y_test, y_pred_knn))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred_knn))
print('Classification Report:\n', classification_report(y_test, y_pred_knn))

Accuracy Score:
 0.4371069182389937
Confusion Matrix:
 [[ 29  70   2]
 [  0  48   0]
 [  0 465 340]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.29      0.45       101
           1       0.08      1.00      0.15        48
           2       0.99      0.42      0.59       805

    accuracy                           0.44       954
   macro avg       0.69      0.57      0.40       954
weighted avg       0.95      0.44      0.56       954



Logistic Regression

In [108]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

In [109]:
model.fit(x_train_tfidf, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [110]:
from sklearn.metrics import confusion_matrix, accuracy_score

y_pred = model.predict(x_test_tfidf)

print('Accuracy Score:', accuracy_score(y_test, y_pred))
print('Confusion Matrix:', confusion_matrix(y_test, y_pred))

Accuracy Score: 0.8930817610062893
Confusion Matrix: [[ 49   0  52]
 [  1   0  47]
 [  2   0 803]]


AdaBoost

In [112]:
from sklearn.ensemble import AdaBoostClassifier

Model = AdaBoostClassifier()

model.fit(x_train_tfidf, y_train)




LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [113]:
from sklearn import metrics

print("Accuracy:",metrics.accuracy_score(y_test,y_pred))

Accuracy: 0.8930817610062893
