In [1]:
%load_ext autoreload
%autoreload 2

#### Trying out Spacy and TextBlob

In [2]:
import numpy as np
import pandas as pd
import os
import re

import spacy
from tqdm import tqdm

### Experiment with car dataset

In [5]:
data = pd.read_csv("datasets/car_reviews_dataset/Scrapped_Car_Reviews_Toyota.csv", engine='python', index_col=False)

In [8]:
data.head()

Unnamed: 0.1,Unnamed: 0,Review_Date,Author_Name,Vehicle_Title,Review_Title,Review,Rating,review
0,0,on 02/02/17 19:53 PM (PST),Ricardo,1997 Toyota Previa Minivan LE 3dr Minivan,"great vehicle, Toyota best design ever. thank you","there is no way back, enjoy what you have .",5.0,"great vehicle, Toyota best design ever. thank ..."
1,1,on 12/17/16 16:40 PM (PST),matt,1997 Toyota Previa Minivan LE All-Trac 3dr Min...,"my 4th previa, best van ever made!",1st 95 went over 300k before being totalled b...,5.0,"my 4th previa, best van ever made! 1st 95 went..."
2,2,on 04/14/10 07:43 AM (PDT),Joel G,1997 Toyota Previa Minivan LE 3dr Minivan,Mom's Taxi Babies Ride,Sold 86 Toyota Van 285K miles to be replaced ...,5.0,Mom's Taxi Babies Ride Sold 86 Toyota Van 285K...
3,3,on 11/12/08 17:31 PM (PST),Dennis,1997 Toyota Previa Minivan LE All-Trac 3dr Min...,My Favorite Van Ever,"I have owned lots of vans, and the Previa is ...",4.875,My Favorite Van Ever I have owned lots of vans...
4,4,on 04/14/08 22:47 PM (PDT),Alf Skrastins,1997 Toyota Previa Minivan LE All-Trac 3dr Min...,Best Minivan ever,My 1997 AWD Previa is the third one that I ha...,5.0,Best Minivan ever My 1997 AWD Previa is the th...


In [7]:
# combine review title and body for text corpus
data["review"] = data["Review_Title"] + data["Review"]

In [15]:
# !python3 -m spacy download en_core_web_lg

In [18]:
spacy_nlp = spacy.load('en_core_web_lg')
                       # , parse=True, tag=True, entity=True)

In [19]:
doc = spacy_nlp(data["review"][0])

In [22]:
data["review"][0]

'great vehicle, Toyota best design ever. thank you there is no way back, enjoy what you have .'

In [20]:
spacy.displacy.render(doc, style='dep', jupyter=True)

### Basic experiment with spacy and textblob

In [4]:
import spacy
from textblob import TextBlob

In [11]:
# !python -m spacy download en_core_web_sm 
# !pip install textblob, spacy

In [12]:
sp = spacy.load("en_core_web_sm")

In [5]:
sentences = [
    'This chocolate truffle cake is really tasty',
  'This party is amazing!',
  'My mom is the best!',
  'App response is very slow!'
  'The trip to India was very enjoyable'
]

In [6]:
extracted_aspects = []

In [13]:
sp(sentences[0])

This chocolate truffle cake is really tasty

In [15]:
for sent in sentences:
    important = sp(sent)
    descriptive_item = ''
    target = ''
    for token in important:
        if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
            target = token.text
        if token.pos_ == 'ADJ':
            added_terms = ''
            for mini_token in token.children:
                if mini_token.pos_ != 'ADV':
                    continue
                added_terms += mini_token.text + ' '
            descriptive_item = added_terms + token.text
extracted_aspects.append({"aspect": target, "description": descriptive_item})
print(f"aspects: {extracted_aspects}")

aspects: [{'aspect': 'cake', 'description': 'really tasty'}, {'aspect': 'party', 'description': 'amazing'}, {'aspect': 'mom', 'description': 'best'}, {'aspect': 'response', 'description': 'very enjoyable'}, {'aspect': 'response', 'description': 'very enjoyable'}]


In [17]:
for aspect in extracted_aspects:
    aspect['sentiment'] = TextBlob(aspect["description"]).sentiment
print(f"sentiments: {extracted_aspects}")

sentiments: [{'aspect': 'cake', 'description': 'really tasty', 'sentiment': Sentiment(polarity=0.2, subjectivity=0.2)}, {'aspect': 'party', 'description': 'amazing', 'sentiment': Sentiment(polarity=0.6000000000000001, subjectivity=0.9)}, {'aspect': 'mom', 'description': 'best', 'sentiment': Sentiment(polarity=1.0, subjectivity=0.3)}, {'aspect': 'response', 'description': 'very enjoyable', 'sentiment': Sentiment(polarity=0.65, subjectivity=0.78)}, {'aspect': 'response', 'description': 'very enjoyable', 'sentiment': Sentiment(polarity=0.65, subjectivity=0.78)}]
