# Sentiment Analysis of Tech News Articles

In [1]:
import pandas as pd
import nltk

nltk.download(["names",
     "stopwords",
     "averaged_perceptron_tagger",
     "vader_lexicon",
     "punkt",
])

[nltk_data] Downloading package names to
[nltk_data]     /Users/matthewvarona/nltk_data...
[nltk_data]   Unzipping corpora/names.zip.
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/matthewvarona/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/matthewvarona/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/matthewvarona/nltk_data...
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/matthewvarona/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [12]:
train_set = pd.read_csv("training.csv", header=None)
devt_set = pd.read_csv("development.csv", header=None)
test_set = pd.read_csv("test.csv", header=None, encoding= 'unicode_escape')

In [14]:
print(len(train_set))
print(len(devt_set))
print(len(test_set))

2346
1500
774


In [16]:
from nltk.sentiment import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

In [37]:
sia.polarity_scores(train_set[1][164])

{'neg': 0.0, 'neu': 0.493, 'pos': 0.507, 'compound': 0.7351}

In [42]:
# Checks the sentiment of a single string of text, ranking it positive, negative, or neutral.
def check_sentiment(string_to_analyze):
    scores = sia.polarity_scores(string_to_analyze)
    neg_neu_pos = [scores['neg'],scores['neu'],scores['pos']]
    highest_score = neg_neu_pos.index(max(neg_neu_pos))
    return_values = ["negative", "neutral", "positive"]
    return return_values[highest_score]

In [44]:
check_sentiment(train_set[1][164])

'positive'

In [62]:
# Scoring each individual string from the corpus
train_list = []
for x in train_set[1]:
    train_list.append(check_sentiment(x))

In [65]:
# Here, we score the guesses of our sentiment analyzer.
# The score is based on the distance of the guesses, e.g.
# if the answer is "negative" but our system guesses "positive",
# the penalty is larger than, say, if the answer was "neutral"
# but our system guessed "positive".
total_score = len(train_list)
score_dict = {"negative":-0.5, "neutral":0, "positive":0.5}
for index, value in enumerate(train_list):
    total_score -= abs(score_dict[value] - score_dict[train_set[0][index]])

In [66]:
print(total_score/len(train_list))

0.8589087809036658


In [74]:
# Filters out the POS tags we don't want to keep. 
pos_tags_to_keep = ["NN","NNP","NNS","NNPS"]
filtered_list = []
for entry in train_set[1]:
    tokenized = nltk.word_tokenize(entry)
    tagged = nltk.pos_tag(tokenized)
    new_phrase = ""
    for word in tagged:
        if word[1] in pos_tags_to_keep:
            new_phrase += word[0] + " "
    print(new_phrase)
    filtered_list.append(new_phrase)

KONE Nordic Exchange Helsinki 
L T Finland Sweden Latvia Russia Norway 
week Nokia relationship Microsoft Windows Phone operating system foundation Nokia phones 
Latvia Stockmann shopping mall subsidiary Finland Stockmann Plc 
LEED certification system U.S. Green Building Council 
LONDON AFX Fortum maintenance service operations funds CapMan sum 
China Unicom NYSE CHU HKSE SHSE mobile carrier country 
amount terminations Finland employees company 
Measures quarter 
Metsa-Botnia payment dividends repayment capital repurchase shares funds divestment Uruguay operations shares Pohjolan Voima financing facilities 
Metso delivery board line air systems winders 
Mobile data transfer LTE era 
dividend Grimaldi family 
mr Bakman expansion options Tallinn-Helisnki link link ship 
Corporation Press release November Corporation company materials sector industry companies Carbon Disclosure Leadership Index CDLI Nordic Report 
M-Real grounds rumors market months 
Nastola jobs February wood products 

price kroon par value bonds kroons issue 
proposal Board Directors issuance option rights correspond proposal Board Directors Notice General Meeting 
purchase sum EUR US mn 
recruitment relocation Stora Enso research operations Karlstad Sweden 
Remote Radio head module power GHz GHz TDD frequency bands 
report equipment business structure operations history products analysis equipment revenue lines 
restructuring measures production material 
sale margarine business Finland Poland approval competition authorities October 
sale Savcor FACE Cencorp profit loss factors valuation consideration shares exchange rates 
sale September October company 
share capital Alma Media Corporation business ID EUR shares 
shares stock options Trade Register January date shares shareholder rights 
ship cranes MacGREGOR partner plants China vessels Chinese COSCO Peter Dohle Hong Kong Cido Shipping 
ship unloader conveying line storage facilities 
site hectares 
study evaluation activity company 
transactio

Cohen Steers Inc. shares % share capital voting rights 
Cohen Steers Inc. shares % share capital voting rights 
com photo-msn 
Companies report Aladdin CA F-Secure Kaspersky Marshal McAfee Microsoft Panda Proofpoint Sophos Symantec Trend Micro Tumbleweed Websense 
COMPTEL CORPORATION Sami Ervio President CEO Distribution NASDAQ OMX Helsinki Major media Comptel Dynamic OSS solutions service providers services 
Connectivity Services Scan Capture paper invoices format Basware Business Transactions Service customer invoices format 
Consumption % 
Country Finland Sector Estate Target Design-Talo Oy Buyer CapMan Oyj Vendor Ruukki Group Oyj Deal size USD m Type Status 
Country Switzerland Sector Pharmaceuticals Therapeutics Holding AG Buyer Biotie Therapies Corp Deal size USD m Type acquisition Financing Status 
Country Germany Sector Estate Target Caverion GmbH Buyer YIT Oyj Deal size USD m Type acquisition Status 
Cramo service company construction machinery equipment rental services rental

product portfolio harvesters forwarders harvester heads cranes loaders 
Janis Arbidans CEO YIT Celtnieciba company housing estate development market 
Jeder Beta-Tester erh+Ælt kostenlos Monate lang Updates eigener Aussage die M+¦glichkeit Version zu beeinflussen 
Jobs Finland 
Joint procurement factories countries 
Jon Risfelt executive positions Europolitan Ericsson SAS American Express card travel divisions Nyman Schultz CEO Vodafone Sweden CEO Gambro Renal Products CEO 
Kaleva Kustannus Oy business ID 0187274-0 notice August Alma Media shares 
Karppinen consolidation trend market 
price increase paper 
forecasts Stora Enso mill Reisholz Germany 
Kemira shares x20ac .71 
KESKO FOOD LTD PRESS RELEASE AT Kesko Food Ltd TV campaign K-food retailers 
Kesko stores chain operations Nordic Baltic countries Russia Belarus 
Kirsi Rantanen development director HK Ruokatalo poultry business 
Koduextra chain stores Non-Food Center KY Rukax OY Scan-Tukka OY 
Koff market share volume market % Karh

Swiss bank Credit Suisse International investor manufacturing plant Astana administration 
Talvivaara Mining Company PLC Talvivaara Mining LSE TALV nickel mining business production line metals recovery plant June 
Talvivaara Mining Company Plc Talvivaara Mining Company base metals producer focus nickel zinc technology bioheapleaching metals ore 
Tampere Science Parks company leases builds office properties facilities businesses 
Target company Tieto Corporation Business Identity Code Date change January Portion shares votes group OP-Pohjola Group Central Cooperative OPK shares % shares rights 
Technopolis customer companies Jyvaskyla 
Tecnomen Espoo Finland solutions telecomms operators service providers 
Tecnomen solution prepaid billing rating voice video calls data traffic kind content services networks 
Tecnomen system range services billing rating voice calls data traffic kind content services networks 
TeliaSonera s subsidiary mobile operator EMT Estonia world s identification s

study proof activity humans safety profile dose ranges studies 
subject project company Software consultancy services services Data services Computer support consultancy services Internet services etc 
player majority owner UCell mid-2007 
talks Food Ingredients Divisions group service functions company 
technology park St. Pulkovo airport 
technology businesses quarter Nokia 
Tecnomen Convergent Charging solution functionality prepaid billing rating voice calls video calls data traffic type content services networks 
Tekla installation Microsoft requirements Windows certification Company 
number rights 
restructuring costs EUR EUR mn December 
service concept technology services Raute customers life cycle investments 
size complex m2 project stages 
value deliveries EUR65m 
value order Aspo transportation subsidiary ESL Shipping Oy EUR USD m 
train territory days Vostochny port 
transaction building lot m factory buildings sq m. autumn Scanfil production operations Oulu facilities par

EQ Bank Olvi sales EUR quarter operating profit EUR mn 
CapMan Nordic buyout mezzanine technology life science estate funds capital 
mn interest income 
Exel board directors dividend share meeting April 
Financing project China 
component supplier Componenta Corporation OMX Helsinki CTH1V Monday June pricing cycle increase material prices 
construction company YIT HEL YTY1V facility engineering facility systems Caverion EUR73m USD90 .3 m 
fibers plastics producer Suominen Corporation OMX Helsinki SUY1V Wednesday October operating loss EUR0 .8 m sales EUR55 .2 m quarter 
Neste Oil list list 
silicon wafer technology company Okmetic Oyj OMX Helsinki OKM1V Wednesday September EUR6m sensor wafer business 
silicon wafers maker Okmetic Oyj September total mln mln sensor wafer business 
sports equipment company Amer Sports workers Salomon division France 
Fiskars supply marine energy equipment solutions services company Wartsila Corporation 
Fiskars R companies world manufacturer stainless st

business February way production design services entity Etteplan Operations 
business transfer effect January connection employees Elisa Daxtum employees 
center Kapuli district Mantsala Hanko-Mantsala-Porvoo road rail link Lahti Jarvenpaa 
changes readership 
closing transaction place today 
closing transaction place January 
capital funds EUR 
company sales mln mln operating profit mln mln 
commission evidence meetings cartel Brussels restaurant November companies price increases 
Committee remuneration remuneration EUR Chairman EUR Deputy Chairman EUR Board members 
companies UPM Fray Bentos pulp mill Forestal Oriental company cultivation 
company EUR2 .8 bn assets shareholders insurers % Varma % state pension fund VER % 
company Rabochy i Kolkhoznitsa A Worker Farmer Moscow system 
company deployment Danish network % population 
company end ammonia plant Billingham maintenance period Ince facility 
company dividend 
company sales year levels 
company sales whole EUR ,000 mn 
compan

share subscriptions stock options 
units Cargotec state art manufacturing facility San Antonio Texas USA operations 
share transactions part company strategy assets part core business 
agreements company customer acquisitions India TVS Electronics contract facility Jun 
time 
order output electricity Italy Wartsila equipment MW 
rating A.M. Best CIS countries Central Eastern Europe 
wood clock cover 
Tikkurila majority stake Kolorit Paints autumn 
slide show product releases Fiskars 
value contract EUR 
Trading name Velta UK Uponor brand company Uponor partner systems building sector UK 
Tulikivi soapstone fireplaces stone products utility ceramics 
TVO MW Olkiluoto plant TWh operating ratio reactors % company standards 
Tyrv+Æinen opinion airline time 
law Parliament grants power plants 
agreement Larox employees engineering documentation delivery projects product maintenance Finland Etteplan January 
agreement Benefon range TWIG GPS navigation phone devices tracking location technolo

Norske Skog staff levels people plans production tons Stora Enso staff people production tons 
filtrates clarity filter cakes meet transport moisture limits TMLs ore grades 
March electronics contract manufacturer Scanfil Oyj HEL SCF1V today plan Scanfil EMS Group Ojala-Yhtyma Oy snag shareholders rival deal 
part strategy Biohit diagnostics business company 
Benefon claims press company handset business 
split number K shares number A shares 
reproduction distribution 
consequence Works Council petition reorganisation 
negotiations reduction persons place redundancy termination contracts 
Elisa CEO Sami Seppanen EMT coverage Estonia information consumers 
SRV Group construction concern operations Finland countries Russia 
case effect Finland 
Initial value contract Euros VAT 
premature dates volume investment procedure 
Mr Clausen option Nordea Citadele Bank 
Nokia Capcom Resident Evil Degeneration N-Gage year 
Nokia Qualcomm views patent situation 
Kallasvuo vice chairman Board 
Lain

reference grade instruments price complexity use 
deal Stockmann Auto Oy Ab sales after-sales services Volkswagen Audi Helsinki Espoo Vantaa 
difference fact companies Gulf Finland 
dollar pound yen 
engines vessel gas LNG emissions 
markets Raisio capita food consumption 
employer health personnel supports pays part cost treatments 
equipment Bollore Africa Logistics Societe d'Exploitation Terminal Vridi SETV Abidjan Ivory Coast delivery March 
hearing Investigation April May 
food concern Atria meat processing companies Woro Kommerts Vastse-Kuuste Lihatoostus Official www.investinestonia.com 
floor area Yliopistonrinne project m building area sq A % facilities 
floor area Yliopistonrinne project m sq ft building area sq A % facilities 
measures negotiations employees February operations country 
NTSB investigators distance tests July trains accident 
number employees group administration employees Sievi units 
pine oil project upgrade chemical recovery plant UPM pulp mill Kymi Finlan

chain costs vendors equipment manufacturer Precor prices 
BioTie licensing partner Somaxon Pharmaceuticals phase II-III study patients gambling pilot phase II study addiction cessation 
news ICIS news www.icis.com Click trial ICIS news 
cap jacket 
administrators need job cuts insurer months 
products salt content 
need staff cuts man-years 
product manager Lassi Hietanen power plant waste Helsinki idea production power plants gas 
Scanfil demand telecommunications network products quarter situation rest year 
Finnair strike losses excess EUR reservations passenger re-routing 
HELSINKI Thomson Financial Shares Cargotec Huhtamaki reports 
airline cancellation flights closure airspace process traffic company loss EUR20m costs passengers accommodation 
announcement weeks licensing agreement companies disagreement royalty payments Texas Instrument chips technology 
Finland OP-Pohjola staff union group insurance sales tasks company sale insurance account wages 
Ruukki Group EUR project 
Sam

Exports goods % imports % 
developer manufacturer phone chargers Salcomp Plc OMX Helsinki SAL1V Wednesday November sales estimate 
investment group Norvestia Oyj profit mln mln mln mln 
Group EBIT half EUR13 .6 US m EUR22 .5 period 
market share cent cent year 
company profit quarter EUR period 
company profit quarter EUR1 .4 m EUR1 .5 m period 
sales EUR 
food trade sales EUR320 .1 m decline % 
January-November group sales EUR mn drop % period 
July-September Konecranes sales EUR mn EUR mn July-September 
sales volume Coca-Cola market share % % 
September market percent liters 
countries development operations reorganisation items takeover process earnings 
countries sales % Russia % terms euros % terms currency 
countries sales % 
Nokia U.S. shares percent GMT 
Nokia shares euros Friday percent start year part introduction models 
Operating profit EUR mn EUR mn 
Operating profit EUR mn 
Operating profit EUR mn EUR mn quarter 
Operating profit quarter EUR33m EUR39m year 
Operating pro