In [6]:
import pandas as pd
import spacy
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span
import random

In [2]:
#Import 
df = pd.read_csv('../data/combined-training.csv', usecols=[1, 3, 4])

In [3]:
#DF styles..

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_colwidth', None)

th_props = [
  ('font-size', '11px'),
  ('text-align', 'center'),
  ('font-weight', 'bold'),
  ('color', '#6d6d6d'),
  ('background-color', '#f7f7f9')
  ]

# Set CSS properties for td elements in dataframe
td_props = [
  ('font-size', '11px'),
  ('text-align', 'left')
  ]

# Set table styles
styles = [
  dict(selector="th", props=th_props),
  dict(selector="td", props=td_props)
  ]


(df.style
    .set_table_styles(styles))

Unnamed: 0,Rating,Aspects,Body
0,1,"Customers Service, Quality",Not intuitive. Bad customer satisfaction services.
1,1,"Customers Service, Price","I just want to upload an image as an attachment to an invoice. I also want the app to read the invoice and track mileage. Ohh, that's only on the cheaper self employed version that is severely stunted. In the 3 years I've been using qb, it's almost doubled in price and almost nothing has improved. Finally, when I request phone help from the phone app, why do I get a rep who knows nothing about the phone app? Where the request comes from isn't logged on the system so the right person can call!"
2,5,General,Wow this is a fantastic program I am a new small business owner. I have learned aso much just by using this app!
3,4,Dashboard,Good apart from the top of the dashboard on the home page being greyed out.
4,5,General,Very good
5,5,General,Easy to use and my Accountant recommended it highly.
6,5,General,Slick app designed to make your business life better.
7,1,"Banking, Customer Service","Very disappointed, missing 6 months of transactions from my bank, tried to fix it as I was instructed to but couldn't, you can't get someone to speak to, no customer service. Big joke, I don't recommend it to anyone"
8,5,General,Good app at new start...
9,5,General,Great


In [None]:
nlp = spacy.load('en_core_web_sm')

df.review = df.Body.str.lower()

print(nlp.pipe_names)

In [None]:
class EntityMatcher(object):
    name = "entity_matcher"

    def __init__(self, nlp, terms, label):
        patterns = [nlp.make_doc(text) for text in terms]
        self.matcher = PhraseMatcher(nlp.vocab)
        self.matcher.add(label, None, *patterns)

    def __call__(self, doc):
        matches = self.matcher(doc)
        for match_id, start, end in matches:
            span = Span(doc, start, end, label=match_id)
            doc.ents = list(doc.ents) + [span]
        return doc

In [None]:
aspect_terms = []
for review in nlp.pipe(df.Body):
    chunks = [(chunk.root.text) for chunk in review.noun_chunks if chunk.root.pos_ == 'NOUN']
    aspect_terms.append(', '.join(chunks))
df['aspect_terms'] = aspect_terms

In [None]:
df.sample(25)

In [4]:
import json

filename = "../data/train_ner.json"
print(filename)


with open(filename) as train_data:
	train = json.load(train_data)

TRAIN_DATA_NEW = []
for data in train:
	ents = [tuple(entity) for entity in data['entities']]
	TRAIN_DATA_NEW.append((data['content'],{'entities':ents}))


with open('{}'.format(filename.replace('json','txt')),'w') as write:
	write.write(str(TRAIN_DATA_NEW))

print('-------------Copy and Paste to spacy training-------------')
print()
print()
print()
# print(TRAIN_DATA_NEW)
print()
print()
print()
print('--------------------------End-----------------------------')

../data/train_ner.json
-------------Copy and Paste to spacy training-------------






--------------------------End-----------------------------


In [7]:

def train_spacy(data,iterations):
    TRAIN_DATA = data
    nlp = spacy.blank('en')  # create blank Language class
    # create the built-in pipeline components and add them to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if 'ner' not in nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe(ner, last=True)
       

    # add labels
    for _, annotations in TRAIN_DATA:
         for ent in annotations.get('entities'):
            ner.add_label(ent[2])

    # get names of other pipes to disable them during training
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
    with nlp.disable_pipes(*other_pipes):  # only train NER
        optimizer = nlp.begin_training()
        for itn in range(iterations):
            print("Statring iteration " + str(itn))
            random.shuffle(TRAIN_DATA)
            losses = {}
            for text, annotations in TRAIN_DATA:
#                 print('---')
#                 print('text:' + text)
#                 print(annotations)
                nlp.update(
                    [text],  # batch of texts
                    [annotations],  # batch of annotations
                    drop=0.2,  # dropout - make it harder to memorise data
                    sgd=optimizer,  # callable to update weights
                    losses=losses)
            print(losses)
    return nlp


prdnlp = train_spacy(TRAIN_DATA_NEW, 20)

Statring iteration 0
{'ner': 882.685852520622}
Statring iteration 1
{'ner': 458.3969667188678}
Statring iteration 2
{'ner': 359.2912551546349}
Statring iteration 3
{'ner': 332.1649724656092}
Statring iteration 4
{'ner': 270.95001494869643}
Statring iteration 5
{'ner': 227.51994460382622}
Statring iteration 6
{'ner': 184.18267689551305}
Statring iteration 7
{'ner': 141.2670921990605}
Statring iteration 8
{'ner': 160.44511661445057}
Statring iteration 9
{'ner': 174.5750615769363}
Statring iteration 10
{'ner': 130.9754826045678}
Statring iteration 11
{'ner': 135.09076390943753}
Statring iteration 12
{'ner': 111.33204204574298}
Statring iteration 13
{'ner': 176.55219752628594}
Statring iteration 14
{'ner': 138.64888209660822}
Statring iteration 15
{'ner': 97.62781945217961}
Statring iteration 16
{'ner': 101.28022304123697}
Statring iteration 17
{'ner': 98.56724670390211}
Statring iteration 18
{'ner': 79.72137708234567}
Statring iteration 19
{'ner': 106.55351981226346}


In [8]:
# Save our trained Model
prdnlp.to_disk("qbo_aspect")

In [9]:
#Test your text
test_text = ["Cancelling your subscription seems impossible. I've emailed, called, used their help menu. It shouldn't be this hard. Still trying to cancel.",
              "My App has stopped working on my Samsung s9 for 2 days in a row. You should have sent an email to inform subscribers if your app is going to be experiencing a downtime.",
              "Cannot open customer invoices. Please correct ASAP!",
               "bugs on iOS13 please update ASAP!"]
for text in test_text:
    print('-----')
    doc = prdnlp(text)
    for ent in doc.ents:
        print('keyword=' + ent.text, 'aspect=' + ent.label_)

-----
keyword=subscription aspect=Subscription
-----
keyword=App aspect=App
-----
keyword=invoices aspect=Estimates
-----
keyword=bugs aspect=Quality


In [10]:
for index, row in df.iterrows():
    aspect_terms = []
    doc = prdnlp(row['Body'])
    for ent in doc.ents:
        if ent.label_ not in aspect_terms:
            aspect_terms.append(ent.label_)

    df.loc[index,'aspect_terms'] = ", ".join(aspect_terms)

In [11]:
df.sample(25)

Unnamed: 0,Rating,Aspects,Body,aspect_terms
88,5,General,Easy and user friendly app,"Updates, App"
297,2,price,Already with $40 was high and now not sure when they changed to $50 per month. And now even more higher. Not even options,
254,5,"payment,experience",App is great but don’t use the pay option. It took a week for the money to hit my account.,
25,2,"Quality, Customers Service","It keeps logging out of my bank, and also deleted a lot of what I logged from last year.. now I dont have the correct information that I need... not very happy with this quickbooks app...",Quality
387,1,"customers service,quality,price",This application made only to collect money from us and every time they increase their monthly payment \nAlways have errors and problems and now big issues that i can not open the application on my phone and no one reply to our email or calls \nThey are totally worst customer service ever,"Quality, Customer Service"
205,1,"experience,integration,quality,taxes",Transferring company file from online QuickBooks to TurboTax is only was possible by transferring the company file to desktop QuickBooks and from there to TurboTax. To do this transition you will need an old computer with windows 7 and IE11 32 Bit. Totally ridiculous and Intuit looks does not care about this issue and left their customers alone with the issue. It’s a shame.,Quality
127,1,"Customer Service, Subscriptions","I am not quite sure about QuickBooks customer service. I have been billed over $400 a month for the last 5 months. I I have yet to have my QuickBooks reconciled in any type of way. It is so messed up. I've gone through four different accountants. On top of that, there was a bug, I did get reimbursed for the payroll aspect of it, but have not yet been able to utilize the actual program I am paying for. It definitely does not show up on this app either. I am so frustrated.","Customer Service, Quality, Payroll"
373,1,"invoicing,updates,comaptibility","Since the latest update, fonts are too large and you can’t read the amounts of your invoices anymore. Sent them many screenshot examples, still can’t use the app anymore. Also, sometimes you can’t even “Accept” an invoice on the app as well. Useless for me to use now. What a shame, because I used it daily. iPhone 11 Pro with latest iOS v13.2.2","Invoicing, Dashboard, Quality"
218,1,"quality,design,experience,navigation,updates","DO NOT UPDATE! They really need to go back to the previous version of this app. Every time they make an update it seems to be useless tweaks in navigation, font size and anything else that adds no value or functionality. They really need to just work on bugs and leave the layout alone. \n\nWith the latest update, they literally removed Every feature I used to use- dashboard widgets, P&L, balance sheet, bank account updating-all gone. It’s now absolutely useless.","App, Experience, Quality, Dashboard"
12,5,"General, Experience",Kinda tricky at first but easily learned,Experience
