# MSCA 32018 Natural Language Processing and Cognitive Computing
## Final Project - Targeted (Entity) Sentiment Identification


Shijia Huang

-----

In [1]:
#!pip install -r requirements.txt

In [2]:
# Import basic libraries
import time
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt 

%matplotlib inline

In [3]:
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 500)

In [4]:
# Import NLP libraries
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from pprint import pprint
import string
from rake_nltk import Rake

import eli5
import pickle

import spacy
from spacy import displacy
from spacy.util import minibatch, compounding
spacy.prefer_gpu()
print(spacy.__version__)

import gensim
from gensim import corpora, models
from gensim.utils import simple_preprocess
from gensim.models.ldamulticore import LdaMulticore
from gensim.models import CoherenceModel

import pyLDAvis
import pyLDAvis.gensim as gensimvis
#import pyLDAvis.gensim_models as gensimvis
pyLDAvis.enable_notebook()

2023-05-20 17:24:08.373386: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


3.5.2


In [5]:
# Import sklearn libraries
import sklearn
import imblearn
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression, SGDClassifier

In [6]:
import multiprocessing as mp

num_processors = mp.cpu_count()
print(f'Available CPUs: {num_processors}')

Available CPUs: 12


## Read Data

In [7]:
%%time

# read in the sentiment data
path = "gs://nlp-final-project-data/data/"
df_news = pd.read_parquet(path + 'news_sentiment.parquet', engine='pyarrow')
df_news.shape

CPU times: user 2min 37s, sys: 1min 8s, total: 3min 46s
Wall time: 5min 11s


(154283, 11)

In [8]:
df_news.head()

Unnamed: 0,id,date,cleaned title,cleaned text,title_tokens,title_lemmatized,text_tokens,text_lemmatized,title_keywords,text_keywords,sentiment
0,1,2020-02-27,Children With Autism Saw Their Learning and Social Skills Boosted After Playing With This AI Robot,"Children With Autism Saw Their Learning and Social Skills Boosted After Playing With This AI Robot admin Latest posts by admin see all Mansplaining in conferences: How can we get him to forestall February 27, 2020 Coronavirus Could Explode in the U.S. Overnight Like it Did in Italy February 27, 2020 Levi Strauss marks the next phase in corporate paid leave policies February 27, 2020 Scientists who designed an artificially clever robotic that helped youngsters with autism spice up their ...","[children, autism, saw, learning, social, skills, boosted, playing, ai, robot, children, autism, saw, learning, social, skills, boosted, playing, ai, robot, children, autism, saw, learning, social, skills, boosted, playing, ai_robot]","[child, autism, see, learn, social, skill, boost, play, robot, child, autism, see, learn, social, skill, boost, play, robot, child, autism, see, learn, social, skill, boost, play, ai_robot]","[children, autism, saw, learning, social, skills, boosted, playing, ai, robot, admin, latest, posts, admin, see, mansplaining, conferences, get, forestall, february, coronavirus, could, explode, overnight, like, italy, february, levi, strauss, marks, next, phase, corporate, paid, leave, policies, february, scientists, designed, artificially, clever, robotic, helped, youngsters, autism, spice, studying, social, talents, hope, era, may, future, help, others, developmental, dysfunction, learn, ...","[child, autism, see, learn, social, skill, boost, play, robot, late, post, admin, see, mansplaining, conference, get, explode, overnight, mark, next, phase, corporate, pay, leave, policy, scientist, design, artificially, clever, robotic, help, youngster, autism, spice, study, social, talent, era, future, help, other, developmental, dysfunction, learn, notice, youngster, gentle, average, autism, take, domestic, s, refer, socially, assistive, robotic, name, kiwi, month, accord, commentary, way...","[social, skill, see, play, learn, child, boost, autism, robot, ai_robot]","[robotic, youngster, kid, child, kiwi, market, autism, learn, crew, talent]",5
1,2,2021-03-26,"Forget ML, AI and Industry 4.0 – obsolescence should be your focus","Forget ML, AI and Industry 4.0 obsolescence should be your focus The world entered a new era of accelerated transformation in the last eighteen months that will continue to evolve and press forward for years to come. Most businesses are playing catchup trying to make sense of a new timeline where the ten years that had been set aside for careful planning and implementation of what was coming up next no longer exists. The next is happening now and, regardless of your industry or seniority, t...","[forget, ml, ai, industry, obsolescence, focus, forget, ml, ai, industry, obsolescence, focus, forget, ml, ai, industry, obsolescence, focus]","[forget, ml, ai, industry, obsolescence, focus, forget, ml, ai, industry, obsolescence, focus, forget, ml, ai, industry, obsolescence, focus]","[forget, ml, ai, industry, obsolescence, focus, world, entered, new, era, accelerated, transformation, last, eighteen, months, continue, evolve, press, forward, years, come, businesses, playing, catchup, trying, make, sense, new, timeline, ten, years, set, aside, careful, planning, implementation, coming, next, longer, exists, next, happening, regardless, industry, seniority, status, quo, shifted, better, face, back, invited, attend, pompous, meeting, london, brazilian, embassy, along, selec...","[forget, ai, industry, obsolescence, focus, world, enter, new, era, accelerate, transformation, last, month, continue, evolve, press, forward, year, come, business, play, catchup, try, make, sense, new, timeline, year, set, aside, careful, planning, implementation, come, next, long, exist, next, happen, regardless, industry, seniority, status, quo, shift, well, face, back, invite, attend, pompous, meeting, brazilian, embassy, select, lead, name, oil, energy, industry, get, update, go, happen...","[obsolescence, ml, industry, forget, focus, ai]","[electronic, come, card, industry, repair, new, system, require, test, business]",4
2,3,2021-03-10,Strategy Analytics: 71% of Smartphones Sold Globally in 2021 will be AI Powered,"Strategy Analytics: 71 of Smartphones Sold Globally in 2021 will be AI Powered BOSTONBUSINESS WIREStrategy Analytics in a newly published report, Smartphones: Global Artificial Intelligence Technologies Forecast to 2025, finds that ondevice Artificial Intelligence AI is being rapidly implemented by smartphone vendors. AI is used in various functions inside smartphones such as intelligent power optimization, imaging, virtual assistants, and to enhance device performance. The report highlights...","[strategy, analytics, smartphones, sold, globally, ai, powered, strategy, analytics, smartphones_sold, globally, ai, powered, strategy, analytics, smartphones_sold, globally, ai_powered]","[strategy, analytic, smartphone, sell, globally, ai, powered, strategy, analytic, smartphones_sold, globally, ai, powered, strategy, analytic, smartphones_sold, globally, ai_powere]","[strategy, analytics, smartphones, sold, globally, ai, powered, bostonbusiness, wirestrategy, analytics, newly, published, report, smartphones, global, artificial, intelligence, technologies, forecast, finds, ondevice, artificial, intelligence, ai, rapidly, implemented, smartphone, vendors, ai, used, various, functions, inside, smartphones, intelligent, power, optimization, imaging, virtual, assistants, enhance, device, performance, report, highlights, fact, ai, become, important, technology...","[strategy, analytic, smartphone, sell, globally, ai, power, bostonbusiness, wirestrategy, analytic, newly, publish, report, smartphone, global, artificial, intelligence, technology, forecast, find, ondevice, artificial, intelligence, ai, rapidly, implement, vendor, ai, use, various, function, smartphone, intelligent, power, optimization, image, virtual, assistant, enhance, device, performance, report, highlight, fact, ai, become, important, technology, modern, smartphone, push, add, ondevice...","[strategy, globally, analytic, smartphones_sold, powered, ai, smartphone, sell, ai_powere]","[ai, smartphone, strategy, analytic, ondevice, power, technology, report, well, become]",5
3,4,2020-10-20,Olympus to Support Endoscopic AI Diagnosis Education for Doctors in India and to Launch AI Diagnostic Support Application,"Olympus to Support Endoscopic AI Diagnosis Education for Doctors in India and to Launch AI Diagnostic Support Application TOKYO, Oct 20, 2020 ACN Newswire Olympus Corporation took part in a groundbreaking project as a business promoter, in cooperation with the Ministry of Internal Affairs and Communications MIC, entitled, Survey Study for International Expansion of AI Diagnosis Support System Using UltraHigh Magnifying Endoscopes in India. The project aims to develop advanced endoscopy di...","[olympus, support, endoscopic, ai, diagnosis, education, doctors, india, launch, ai, diagnostic, support, application, olympus, support_endoscopic, ai, diagnosis, education, doctors, india, launch, ai, diagnostic, support, application, olympus, support_endoscopic, ai_diagnosis, education, doctors, india, launch, ai, diagnostic, support, application]","[olympu, support, endoscopic, ai, diagnosis, education, doctor, india, launch, ai, diagnostic, support, application, ai, diagnosis, education, doctor, india, launch, ai, diagnostic, support, application, education, doctor, india, launch, ai, diagnostic, support, application]","[olympus, support, endoscopic, ai, diagnosis, education, doctors, india, launch, ai, diagnostic, support, application, tokyo, oct, acn, newswire, olympus, corporation, took, part, groundbreaking, project, business, promoter, cooperation, ministry, internal, affairs, communications, mic, entitled, survey, study, international, expansion, ai, diagnosis, support, system, using, ultrahigh, magnifying, endoscopes, india, project, aims, develop, advanced, endoscopy, diagnostics, india, relatively,...","[olympu, support, ai, diagnosis, education, doctor, launch, ai, diagnostic, support, application, corporation, take, part, groundbreaking, project, business, promoter, cooperation, affair, communication, entitle, survey, study, international, expansion, ai, diagnosis, support, system, use, ultrahigh, magnifying, endoscope, project, aim, develop, advanced, endoscopy, diagnostic, relatively, endoscopist, collaboration, cybernet, establish, ai, diagnostic, support, system, major, medical, insti...","[ai, support, launch, india, education, doctor, diagnostic, application, diagnosis, olympu]","[ai, support, diagnostic, train, system, endoscope, doctor, project, use, diagnosis]",3
4,5,2020-04-17,Cr Bard Inc Has Returned 48.9% Since SmarTrend Recommendation (BCR),"Cr Bard Inc Has Returned 48.9 Since SmarTrend Recommendation BCR SmarTrend identified an Uptrend for Cr Bard Inc :BCR on December 23rd, 2016 at 222.45. In approximately 40 months, Cr Bard Inc has returned 48.91 as of todays recent price of 331.24.In the past 52 weeks, Cr Bard Inc share prices have been bracketed by a low of 0.00 and a high of 0.00 and are now at 331.24, 100 above that low price. In the last five trading sessions, the 50day moving average MA has remained constant while the...","[cr, bard, inc, returned, since, smartrend, recommendation, bcr, cr_bard, inc, returned_since, smartrend_recommendation, bcr, cr_bard_inc, returned_since, smartrend_recommendation, bcr]","[cr, bard, inc, return, smartrend, recommendation, bcr, cr_bard, inc, returned_since, smartrend_recommendation, bcr, cr_bard_inc, returned_since, smartrend_recommendation, bcr]","[cr, bard, inc, returned, since, smartrend, recommendation, bcr, smartrend, identified, uptrend, cr, bard, inc, bcr, december, rd, approximately, months, cr, bard, inc, returned, todays, recent, price, past, weeks, cr, bard, inc, share, prices, bracketed, low, high, low, price, last, five, trading, sessions, day, moving, average, remained, constant, day, remained, constant, bard, inc, designs, manufactures, packages, distributes, sells, medical, surgical, diagnostic, patient, care, devices, ...","[return, smartrend, recommendation, bcr, smartrend, identify, uptrend, rd, approximately, month, return, recent, price, past, week, share, price, bracket, low, high, low, price, last, trading, session, day, move, average, remain, constant, day, remain, constant, design, manufacture, package, distribute, sell, medical, surgical, diagnostic, patient, care, device, sell, broad, range, product, worldwide, hospital, individual, healthcare, professional, extend, care, facility, alternate, site, fa...","[bcr, smartrend_recommendation, returned_since, inc, smartrend, return, recommendation, cr_bard_inc, cr_bard, cr]","[market, share, approach, trend, resistance, return, alert, new, price, realtime]",2


In [9]:
%%time

# read in the entities data
path = "gs://nlp-final-project-data/result/"
AI_entities = pd.read_csv(path + "AI_entities.csv")
ORG_entities = pd.read_csv(path + "ORG_entities.csv")

CPU times: user 44.1 ms, sys: 91.2 ms, total: 135 ms
Wall time: 696 ms


In [10]:
AI_entities

Unnamed: 0,Entities,Labels,count
0,ChatGPT,ORG,136885
1,ML,ORG,24115
2,Machine Learning,ORG,17099
3,Bard,ORG,14528
4,GPT3,ORG,8427
5,GPT,ORG,7565
6,AIgenerated,ORG,6998
7,Bing,PERSON,8556
8,Vectorspace AI,PERSON,1784
9,Ernie Bot,PERSON,1705


In [11]:
ORG_entities

Unnamed: 0,Entities,Labels,count
0,Google,ORG,80971
1,Microsoft,ORG,67475
2,OpenAI,ORG,32015
3,IBM,ORG,25276
4,Facebook,ORG,19353
5,NVIDIA,ORG,19219
6,Amazon,ORG,17679
7,Googles,ORG,14072
8,Apple,ORG,12477
9,Twitter,ORG,11906


## ORG Targeted Sentiment Analysis

## AI Targeted Sentiment Analysis