#### Experiment with airline reviews (tweets) - Aspect Extraction [Source](https://www.kaggle.com/datasets/crowdflower/twitter-airline-sentiment?resource=download)

In [1]:
import spacy
import random
import re
from collections import defaultdict
import pandas as pd

In [2]:
# df = pd.read_csv("datasets/airline_reviews_tweets/Tweets.csv")
df = pd.read_csv("datasets/Restaurant_reviews/Restaurants_Train_v2.csv")

In [3]:
df.columns

Index(['id', 'Sentence', 'Aspect Term', 'polarity', 'from', 'to'], dtype='object')

#### Combine all aspects of a sentence in one column

In [4]:
df1 = df[["Sentence", "Aspect Term"]].rename(columns={"Sentence":"text", "Aspect Term":"aspects"})

In [5]:
df1.head()

Unnamed: 0,text,aspects
0,But the staff was so horrible to us.,staff
1,"To be completely fair, the only redeeming fact...",food
2,"The food is uniformly exceptional, with a very...",food
3,"The food is uniformly exceptional, with a very...",kitchen
4,"The food is uniformly exceptional, with a very...",menu


In [6]:
data = defaultdict(list)
for row in df1.itertuples():
    data[row.text].append(row.aspects)

In [7]:
sample = dict(random.sample(data.items(), 5))
sample

{'In summer-eat outside on a terrace (another great feature of Suan)!!!': ['terrace'],
 'If you want a casual neighborhood bistro that has great food and excellent service, this is the place.': ['food',
  'service'],
 "One would think we'd get an apology or complimentary drinks - instead, we got a snobby waiter wouldn't even take our order for 15 minutes and gave us lip when we asked him to do so.": ['waiter',
  'drinks'],
 'The atmosphere is unheralded, the service impecible, and the food magnificant.': ['atmosphere',
  'service',
  'food'],
 'Still, any quibbles about the bill were off-set by the pour-your-own measures of liquers which were courtesey of the house...': ['bill',
  'measures of liquers']}

In [79]:
doc = spacy_model(clean_review('If you want a casual neighborhood bistro that has great food and excellent service, this is the place.'))
[(x, x.pos_) for x in doc]
print([chunk.text for chunk in doc.noun_chunks])

['you', 'a casual neighborhood', 'that', 'great food', 'excellent service', 'this', 'the place']


#### Cleaning the reviews

In [29]:
def clean_review(text):
    text = re.sub(r'[.,\/#!$%\^&\*;:{}=_`~()]', '', text)
    return text

#### Spacy
"NOUN" - Noun,
"PROPN" - Proper Noun,
"PRON" - Pronouns,
"DET" - Articles (a, an, the)
"ADP" - adposition

#### Rule
1. Noun followed by adjective or vice-versa

In [75]:
spacy_model = spacy.load("en_core_web_sm")
for k, v in sample.items():
    doc = spacy_model(clean_review(k))
    filtered_words = [x for x in doc if x.pos_ in ("NOUN", "ADJ")]
    prev = filtered_words[0]
    pairs = []
    for words in filtered_words[1:]:
        if prev.pos_ == words.pos_:
            prev = words 
        else:
            pairs.append((prev, words))
    print(pairs, doc)



[(terrace, great)] In summer-eat outside on a terrace another great feature of Suan
[(casual, neighborhood), (casual, bistro), (great, food), (excellent, service), (excellent, place)] If you want a casual neighborhood bistro that has great food and excellent service this is the place
[(apology, complimentary), (drinks, snobby)] One would think we'd get an apology or complimentary drinks - instead we got a snobby waiter wouldn't even take our order for 15 minutes and gave us lip when we asked him to do so
[(atmosphere, unheralded), (service, impecible)] The atmosphere is unheralded the service impecible and the food magnificant
[(pour, own)] Still any quibbles about the bill were off-set by the pour-your-own measures of liquers which were courtesey of the house


In [74]:
pairs = []
doc = spacy_model(clean_review('In summer-eat outside on a terrace another great feature of Suan of features'))
filtered_words = [x for x in doc if x.pos_ in ("NOUN", "ADJ")]
print(filtered_words)
prev = filtered_words[0]
for words in filtered_words[1:]:
    print(words)
    if prev.pos_ == words.pos_:
        prev = words 
    else:
        pairs.append((prev, words))
        
pairs

[summer, terrace, great, feature, features]
terrace
great
feature
features


[(terrace, great)]

In [74]:
for k, v in sample.items():
    print(v)

['foods', 'MSG cooking']
['tuna tartare', 'sake', 'mushroom ravioli', 'pinot noir', 'chocolate sampler', 'dessert wine']
['filet mignon dish']
['sea bass']
['lines', 'crowds']


#### Test aspectnlp

In [None]:
from aspectnlp.aspect_detector import aspectDetector
from aspectnlp.aspect_sentiment import AspectSentimentScorer
from aspectnlp.vae_topic_model import VAETopic
from aspectnlp.w2v import fasttext_emb

In [None]:
as