### Step 1: with the task of Hotel Reviews sentiment analysis with NLP by importing the necessary Python libraries and import the dataset and view the top 5 rows of the dataset:

In [1]:
#Importing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import svm
from sklearn.metrics import f1_score

In [2]:
data = pd.read_csv("tripadvisor_hotel_reviews.csv")
data.head()

Unnamed: 0,Review,Rating
0,nice hotel expensive parking got good deal sta...,4
1,ok nothing special charge diamond member hilto...,2
2,nice rooms not 4* experience hotel monaco seat...,3
3,"unique, great stay, wonderful time hotel monac...",5
4,"great stay great stay, went seahawk game aweso...",5


### Step 2: Now we will clean our data. We will filter our data based on ratings, where we have ratings less than 3 we will keep that in the ‘negative review’ variable, whereas we have 5 ratings we will keep that in the ‘five_review’ variable.

### Cleaning data

In [3]:
data['Rating'].value_counts()

5    9054
4    6039
3    2184
2    1793
1    1421
Name: Rating, dtype: int64

In [5]:
negative_review = data[data['Rating'] < 3]
negative_review = negative_review.reset_index(drop = True)

five_review = data.loc[(data['Rating'] == 5)]
five_review = five_review.reset_index(drop = True)

positive_review = five_review.loc[:len(negative_review)]
len(positive_review)

3215

In [6]:
data_all = pd.concat([positive_review, negative_review], axis = 0)
data_all.sample(5)

Unnamed: 0,Review,Rating
1497,3 star lobby 2 star hotel stayed start end eur...,2
3104,"beware, chic born interesting hotel property, ...",1
2933,superb resort spent week secrets january 5-14t...,5
2675,hotel 1929 worst boutique hotel world write re...,1
1794,hotel gets 3 stars hell 4 5 star venue depress...,1


### Step 3: We will create a sentiment column where sentiment which are equal to 5 will have positive sentiment otherwise negative, we will assign negative sentiment.

### Create a sentiments column

In [7]:
data_all['Sentiment'] = np.where(data_all['Rating'] == 5, 'Positive', 'Negative')

data_all = data_all.sample(frac = 1)
data_all = data_all.reset_index(drop = True)
data_all.head()

Unnamed: 0,Review,Rating,Sentiment
0,"melia caribe wonderful, just returned wonderfu...",5,Positive
1,"hated checked early, travel extensively time d...",1,Negative
2,dirty congested checked 10:00 p.m. wait rooms ...,1,Negative
3,lame located far wonderful places florence hot...,2,Negative
4,single beds terrible stayed hotel work trainin...,1,Negative


### Step 4: Now, we will split our dataset into train and test.

### Split data into train and test

In [8]:
x_train, x_test, y_train, y_test = train_test_split(data_all.Review, data_all.Sentiment)

In [9]:
cv = CountVectorizer()
x_train_vec = cv.fit_transform(x_train)
x_text_vec = cv.transform(x_test)

### Step 5: Modeling
    
### Modeling    

In [13]:
clf_svm = svm.SVC(kernel = 'linear')
clf_svm.fit(x_train_vec, y_train)

SVC(kernel='linear')

### Step 6: we will now test the accuracy of our model.
    
### Test accuracy    

In [14]:
clf_svm.score(x_text_vec, y_test)

0.9390547263681592

In [15]:
f1_score(y_test, clf_svm.predict(x_text_vec), average= None)

array([0.93950617, 0.93859649])

### Step 7: Now, we will test our model on real Reviews.
    
### Testing model on Reviews    

In [16]:
review_testing = ["I loved this place, this is beautiful and i can't wait to come again"]
review_vec = cv.transform(review_testing)
clf_svm.predict(review_vec)

array(['Positive'], dtype=object)

In [20]:
review_testing = ["I don't loved this place, i well never come back to this place again"]
review_vec = cv.transform(review_testing)
clf_svm.predict(review_vec)

array(['Positive'], dtype=object)