In [167]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from textblob import TextBlob

In [168]:
data = pd.read_csv('Evaluation-dataset.csv')
data

Unnamed: 0,"Tires where delivered to the garage of my choice,the garage notified me when they had been delivered. A day and time was arranged with the garage and I went and had them fitted,a Hassel free experience.",garage service positive,ease of booking positive,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14
0,"Easy Tyre Selection Process, Competitive Prici...",garage service positive,value for money positive,,,,,,,,,,,,
1,Very easy to use and good value for money.,value for money positive,,,,,,,,,,,,,
2,Really easy and convenient to arrange,ease of booking positive,,,,,,,,,,,,,
3,It was so easy to select tyre sizes and arrang...,location positive,value for money positive,ease of booking positive,,,,,,,,,,,
4,service was excellent. Only slight downside wa...,length of fitting positive,ease of booking positive,ease of booking negative,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10126,"I ordered the wrong tyres, however [REDACTED] ...",refund positive,delivery punctuality positive,refund timescale positive,,,,,,,,,,,
10127,"Good experience, first time I have used [REDAC...",length of fitting positive,,,,,,,,,,,,,
10128,"I ordered the tyre I needed on line, booked a ...",location positive,delivery punctuality positive,length of fitting positive,value for money positive,,,,,,,,,,
10129,Excellent service from point of order to fitti...,,,,,,,,,,,,,,


In [169]:
data.rename(columns={data.columns[0]: 'review'}, inplace=True)
data.columns

Index(['review', 'garage service positive', 'ease of booking positive',
       'Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7',
       'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12',
       'Unnamed: 13', 'Unnamed: 14'],
      dtype='object')

In [170]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [171]:
stop_words = set(stopwords.words('english'))

In [172]:
def preprocess(text):
    words = word_tokenize(text)
    words = [word for word in words if word.lower() not in stop_words]
    return words

In [173]:
data['review'] = data['review'].astype(str)
data['processed_text'] = data['review'].apply(preprocess)
print(data['processed_text'].head())


0    [Easy, Tyre, Selection, Process, ,, Competitiv...
1                   [easy, use, good, value, money, .]
2                  [Really, easy, convenient, arrange]
3    [easy, select, tyre, sizes, arrange, local, fi...
4    [service, excellent, ., slight, downside, know...
Name: processed_text, dtype: object


In [174]:
def get_sentiment(text):
    analysis = TextBlob(text)
    # Determine the sentiment
    if analysis.sentiment.polarity > 0:
        return 'positive'
    elif analysis.sentiment.polarity == 0:
        return 'neutral'
    else:
        return 'negative'


In [175]:
data['sentiment'] = data['review'].apply(get_sentiment)

In [176]:
print(data[['review', 'sentiment']].head())

                                              review sentiment
0  Easy Tyre Selection Process, Competitive Prici...  positive
1         Very easy to use and good value for money.  positive
2              Really easy and convenient to arrange  positive
3  It was so easy to select tyre sizes and arrang...  positive
4  service was excellent. Only slight downside wa...  positive


In [177]:
subthemes = {
    'garage service': ['garage', 'service'],
    'wait time': ['wait', 'delay', 'time'],
    'incorrect tyres': ['incorrect', 'wrong', 'tyre', 'tyres']
}

In [178]:
def identify_subthemes(text):
    themes = {}
    for theme, keywords in subthemes.items():
        for keyword in keywords:
            if keyword in text:
                themes[theme] = get_sentiment(text)
    return themes

In [179]:
data['subthemes'] = data['review'].apply(lambda x: identify_subthemes(x.lower()))
data.head()

Unnamed: 0,review,garage service positive,ease of booking positive,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,processed_text,sentiment,subthemes
0,"Easy Tyre Selection Process, Competitive Prici...",garage service positive,value for money positive,,,,,,,,,,,,,"[Easy, Tyre, Selection, Process, ,, Competitiv...",positive,"{'garage service': 'positive', 'incorrect tyre..."
1,Very easy to use and good value for money.,value for money positive,,,,,,,,,,,,,,"[easy, use, good, value, money, .]",positive,{}
2,Really easy and convenient to arrange,ease of booking positive,,,,,,,,,,,,,,"[Really, easy, convenient, arrange]",positive,{}
3,It was so easy to select tyre sizes and arrang...,location positive,value for money positive,ease of booking positive,,,,,,,,,,,,"[easy, select, tyre, sizes, arrange, local, fi...",positive,{'incorrect tyres': 'positive'}
4,service was excellent. Only slight downside wa...,length of fitting positive,ease of booking positive,ease of booking negative,,,,,,,,,,,,"[service, excellent, ., slight, downside, know...",positive,"{'garage service': 'positive', 'wait time': 'p..."


In [180]:
columns_to_drop = ['Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 
                   'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 
                   'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14']

data.drop(columns=columns_to_drop, inplace=True)

In [181]:
data.head()

Unnamed: 0,review,garage service positive,ease of booking positive,processed_text,sentiment,subthemes
0,"Easy Tyre Selection Process, Competitive Prici...",garage service positive,value for money positive,"[Easy, Tyre, Selection, Process, ,, Competitiv...",positive,"{'garage service': 'positive', 'incorrect tyre..."
1,Very easy to use and good value for money.,value for money positive,,"[easy, use, good, value, money, .]",positive,{}
2,Really easy and convenient to arrange,ease of booking positive,,"[Really, easy, convenient, arrange]",positive,{}
3,It was so easy to select tyre sizes and arrang...,location positive,value for money positive,"[easy, select, tyre, sizes, arrange, local, fi...",positive,{'incorrect tyres': 'positive'}
4,service was excellent. Only slight downside wa...,length of fitting positive,ease of booking positive,"[service, excellent, ., slight, downside, know...",positive,"{'garage service': 'positive', 'wait time': 'p..."


In [182]:
data['sentiment'].unique()

array(['positive', 'neutral', 'negative'], dtype=object)

In [183]:
data.to_csv('final_review.csv', index=False)

--> shivam borse