In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from textblob import TextBlob

In [2]:
import pandas as pd


In [4]:
reviews_df = pd.read_csv('reviews_data.csv')

In [5]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [6]:
def preprocess(text):
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    clean_tokens = [lemmatizer.lemmatize(word.lower()) for word in tokens if word.isalnum() and word.lower() not in stop_words]

    return clean_tokens

In [7]:
from nltk.tag import pos_tag

In [8]:
def extract_subtheme_sentiments(review, pos_tags):
  subthemes = []
  current_subtheme = []
  for word, pos in pos_tags:
      if pos.startswith('NN'):  # Noun
          current_subtheme.append(word)
      elif pos.startswith('JJ'):  # Adjective
          current_subtheme.append(word)
      elif pos.startswith('RB'):  # Adverb
          current_subtheme.append(word)
      elif pos.startswith('VB'):  # Verb
          current_subtheme.append(word)
      else:
          if current_subtheme:
              subthemes.append(" ".join(current_subtheme))
              current_subtheme = []
  if current_subtheme:
      subthemes.append(" ".join(current_subtheme))


  subtheme_sentiments = {}
  for subtheme in subthemes:
      subtheme_blob = TextBlob(subtheme)
      sentiment_score = subtheme_blob.sentiment.polarity
      sentiment_label = "positive" if sentiment_score > 0 else "negative" if sentiment_score < 0 else "neutral"
      subtheme_sentiments[subtheme] = sentiment_label

  return subtheme_sentiments

In [9]:
# implementing function on single review

revie = 'Tires where delivered to the garage of my choice,the garage notified me when they had been delivered. A day and time was arranged with the garage and I went and had them fitted,a Hassel free experience.'
word = preprocess(revie)
postag = pos_tag(word)
extract_subtheme_sentiments(revie, postag)

{'tire delivered garage choice garage notified delivered day time arranged garage went fitted hassel free experience': 'positive'}

In [10]:
subtheme_sentiments_df = pd.DataFrame(columns=["subtheme", "sentiment"])

In [None]:
reviews_df['Words'] = reviews_df['Reviews'].apply(preprocess)
reviews_df['Pos_tags'] = reviews_df['Words'].apply(pos_tag)
reviews_df['Subtheme_sentiments'] = reviews_df.apply(lambda row: extract_subtheme_sentiments(row['Reviews'], row['Pos_tags']), axis=1)

# Appending sentiment for each subtheme to subtheme_sentiments_df
for index, row in reviews_df.iterrows():
    for subtheme, sentiment in row['Subtheme_sentiments'].items():
        subtheme_sentiments_df = subtheme_sentiments_df.append({"subtheme": subtheme, "sentiment": sentiment}, ignore_index=True)

In [12]:
subtheme_sentiments_df

Unnamed: 0,subtheme,sentiment
0,tire delivered garage choice garage notified d...,positive
1,easy tyre selection process competitive pricin...,positive
2,easy use good value money,positive
3,really easy convenient arrange,positive
4,easy select tyre size arrange local fitting pr...,positive
...,...,...
16113,ordered tyre needed line booked specified time...,neutral
16114,use redacted good price tyre quick search,positive
16115,excellent service point order fitting complain...,positive
16116,seamless well managed end,positive
