In [1]:
import pandas as pd
import sqlite3
import re
import matplotlib.pyplot as plt

import stanza

In [2]:
con = sqlite3.connect('data.db')

cur = con.cursor()

query = """SELECT * FROM article
            WHERE text NOT LIKE '%Automated Insights%'
            LIMIT 100;
        """

articles = [*cur.execute(query)]

con.close()

In [3]:
df = pd.DataFrame(articles, columns = ['id', 'title', 'body', 'href', 'date']).set_index('id')

df['size'] = df['body'].apply(lambda x: len(x.split()))

df['date'] = pd.to_datetime(df['date'], infer_datetime_format=True)

mask = (df['size'] > 50) & (df['size'] < 750)
df = df[mask]

In [4]:
nlp = stanza.Pipeline(lang='en', processors='tokenize,sentiment')

2022-07-24 16:07:52 INFO: Loading these models for language: en (English):
| Processor | Package  |
------------------------
| tokenize  | combined |
| sentiment | sstplus  |

2022-07-24 16:07:52 INFO: Use device: cpu
2022-07-24 16:07:52 INFO: Loading: tokenize
2022-07-24 16:07:52 INFO: Loading: sentiment
2022-07-24 16:07:53 INFO: Done loading processors!


In [5]:
def extract_sentiment(text):
    doc = nlp(text)
    if len(doc.sentences) == 1:
        return doc.sentences[0].sentiment - 1
    else:
        sentiment = 0
        for sentence in doc.sentences:
            sentiment += sentence.sentiment
        return sentiment / len(doc.sentences) - 1

In [71]:
sample = df.sample()
body = sample['body'].item()
title = sample['title'].item()
print(title)
doc = nlp(body)
for sent in doc.sentences:
    print(sent.sentiment)
    print(sent.text)
extract_sentiment(body)

A Giant Pension Trimmed Its Apple Stake. Here’s What It Bought.
1
The second-largest U.S. public pension by assets made substantial investment changes in the second quarter.
1
The California State Teachers’
1
Retirement System, Calstrs, substantially raised its investments in (ticker: UBER), Abbvie (ABBV), and Slack Technologies stock (WORK) from April through June.
1
In contrast to those large purchases, the pension also trimmed its holdings in stock (AAPL).
1
Calstrs disclosed the trades in a form it filed with the Securities and Exchange Commission.
1
Calstrs, which managed $226.9 billion in assets as of March 31, declined to comment on the trades.
1
It said in May that its funded ratio, comparing assets on hand against liabilities, improved to 66% as of June 30, 2019, from 64% the year before.
1
In other words, the pension had 66 cents for every dollar of benefits to be paid.
1
Calstrs bought 770,757 more Uber shares in the second quarter, raising its investment to 2.4 million shar

-0.05714285714285716