start of code

In [51]:
import numpy as np
import os
import matplotlib as mpl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import spacy
import re
from string import punctuation
from textblob import TextBlob
import math
from spacy.lang.en import English




plt.rcParams['figure.figsize'] = (8, 8)

In [53]:
nlp = spacy.load("en_core_web_sm")

In [None]:

def clean_sentences(sentences):
  ### clean up sentences by removing extra or any punctuation characters
  all_sentences = []
  for sentence in sentences:
    clean_sentence = re.sub(f"[{re.escape(punctuation)}]", "", sentence)
    all_sentences.append(clean_sentence)
  return all_sentences

In [None]:

def form_sentences(doc):
  ### form the sentenence tokenizer including the additional punctuation marks
  default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
            '।', '॥', '၊', '။', '።', '፧', '፨', '᙮', '᜵', '᜶', '᠃', '᠉', '᥄',
            '᥅', '᪨', '᪩', '᪪', '᪫', '᭚', '᭛', '᭞', '᭟', '᰻', '᰼', '᱾', '᱿',
            '‼', '‽', '⁇', '⁈', '⁉', '⸮', '⸼', '꓿', '꘎', '꘏', '꛳', '꛷', '꡶',
            '꡷', '꣎', '꣏', '꤯', '꧈', '꧉', '꩝', '꩞', '꩟', '꫰', '꫱', '꯫', '﹒',
            '﹖', '﹗', '！', '．', '？', '𐩖', '𐩗', '𑁇', '𑁈', '𑂾', '𑂿', '𑃀',
            '𑃁', '𑅁', '𑅂', '𑅃', '𑇅', '𑇆', '𑇍', '𑇞', '𑇟', '𑈸', '𑈹', '𑈻', '𑈼',
            '𑊩', '𑑋', '𑑌', '𑗂', '𑗃', '𑗉', '𑗊', '𑗋', '𑗌', '𑗍', '𑗎', '𑗏', '𑗐',
            '𑗑', '𑗒', '𑗓', '𑗔', '𑗕', '𑗖', '𑗗', '𑙁', '𑙂', '𑜼', '𑜽', '𑜾', '𑩂',
            '𑩃', '𑪛', '𑪜', '𑱁', '𑱂', '𖩮', '𖩯', '𖫵', '𖬷', '𖬸', '𖭄', '𛲟', '𝪈',
            '｡', '。', '\n', '....', '..', '.....', '...']
  nlp = English()
  config_ = {"punct_chars": default_punct_chars}
  nlp.add_pipe(nlp.create_pipe('sentencizer', config = config_))
  doc = nlp(doc)
  sentences = [sent.text.strip() for sent in doc.sents]
  return sentences

In [None]:
def get_sentiment(polarity, subjectivity):
  ## get the sentiments based on the polarity and subjectivity
  sentiment= 'Neutral'
  if polarity >= 0.2:
    sentiment= 'Positive'
  elif polarity <= -0.1 :
    sentiment = 'Negative'
  if polarity + subjectivity <= 0.1 * 2:
    sentiment= 'Negative'
  if polarity + subjectivity >= 0.5 * 2:
    sentiment= 'Positive'
  return sentiment

In [None]:

def get_aspect_and_term_from_sentence(sentences):
  ### Gets the aspect and the qualitative or adjective term after tokenizing the sentences.
  ### Input is list of untokenized cleaed up sentences
  ### Output is list of key value pair set for each sentence
  debug = True
  aspects = []
  sentiments = {}
  for sentence in sentences:
    doc = nlp(sentence)
    
    # if debug:
    #   display_detail(doc)

    descriptive_term = ''
    target = ''
    negative = ''
    for token in doc:
      if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
        target = token.text

      if token.dep_ == 'ROOT':
        # print("from root")
        # for child in token.children:
        #   print(child.dep_)
        
        for child in token.children:
          if child.dep_ == 'neg':
            negative += child.text + ' '
            
            

      if token.pos_ == 'ADJ':
        prepend = ''
        for child in token.children:
          if child.pos_ != 'ADV':
            continue
          prepend += child.text + ' '
        descriptive_term = negative+prepend + token.text
        if (len(target) >0  ) & (len( descriptive_term) > 0) :
          aspects.append({'aspect': target,'description': descriptive_term})
  # print(aspects)
  return aspects

In [None]:
def get_score_from_aspect(aspects):
  ### Retruns the scores for list of aspects
  sentiments = {}
  for aspect in aspects:
    # print(aspect)
    score = TextBlob(aspect['description']).sentiment
    

    sentiments[aspect['aspect']] = get_sentiment(score[0], score[1]),score 

  return sentiments

In [None]:
def generate_aspect_sentiment_from_review(review):
  score = []

  if len(review) > 0:
    Sentences = form_sentences(review)
    cleaned_sentences = clean_sentences(Sentences)
    # for clean_sentence in cleaned_sentences:

    #   print(clean_sentence)

    aspect_term_list = get_aspect_and_term_from_sentence(cleaned_sentences)
    # print(aspect_term_list)

    
    score.append(get_score_from_aspect(aspect_term_list))

  

  return score   
        
    

End of code

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
reviews_df = pd.read_csv("/content/drive/Shareddrives/projects_data/Sentiment-Analysis/Datasets/raw/FK_216754.csv").dropna()
reviews_df.head()

In [None]:
pd. set_option('display.max_columns', None)
pd. set_option('display.max_colwidth', -1)


In [None]:
# print(form_sentences('I am truly satisfied with the performance of the phone\nFirst its budget phone so dont expect much from this phone.... coz its doesnt have feautures like finger print and ctype quick charge and camera quality is also okay but not bad.... If u really looking for small screen phones its the best one with snapdragon 439(12nm) processor with better performance than mtk and exynos... so i recommend this who really likes small screen lovers and who doesnt like extra added features'))

In [None]:
# sentences = form_sentences('The phone is good. I am truly satisfied with the performance of the phone!!!!!!!!!!!!!...\nFirst its budget phone so dont expect much from this phone.... coz its doesnt have feautures like finger print and ctype quick charge and camera quality is also okay but not bad.... If u really looking for small screen phones its the best one with snapdragon 439(12nm) processor with better performance than mtk and exynos... so i recommend this who really likes small screen lovers and who doesnt like extra added features')
# sentences = clean_sentences(sentences)

sample tests

In [None]:
generate_aspect_sentiment_from_review('The phone is good. I am truly satisfied with the performance of the phone!!!!!!!!!!!!!...\nFirst its budget phone so dont expect much from this phone.... coz its doesnt have feautures like finger print and ctype quick charge and camera quality is also okay but not bad.... If u really looking for small screen phones its the best one with snapdragon 439(12nm) processor with better performance than mtk and exynos... so i recommend this who really likes small screen lovers and who doesnt like extra added features')

In [None]:
reviews_df['review'][7]

In [None]:
generate_aspect_sentiment_from_review(reviews_df['review'][1])

In [None]:
aspects = get_aspect_and_term_from_sentence('Camera is good battery is good  fingure unlock sensor is very good and fast it is very thick due to large battery this phone is not for slim phone lovers but phone is very good and price are also good , good job samsung')

# for aspect in aspects:
#   aspect['sentiment'] = TextBlob(aspect['description']).sentiment
aspects

In [None]:
reviews_df['review'][9]

In [None]:
text = 'The camera is bad . The sensor is best . Phone price is high. Delivery was very fast'

In [None]:
generate_aspect_sentiment_from_review(text)