In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from app.models import Session, Article, Headline, Agency
from sqlalchemy import func

In [2]:
session = Session()

In [3]:
articles = session.query(Article).join(Headline).group_by(Article.id).having(func.count(Headline.id) >= 3).all()

In [4]:
len(articles)

In [5]:
articles[0].headlines

In [6]:
articles = list(sorted(articles, key=lambda x: len(x.headlines), reverse=True))

In [7]:
articles[0].headlines

In [8]:
article = articles[0]

In [9]:
df = pd.DataFrame([{'sentiment': h.headcompound, 'time': h.last_accessed} for h in article.headlines])

In [10]:
df

In [11]:
sns.lineplot(x='time', y='sentiment', data=df)

In [12]:
changes = []
for article in articles:
    df = pd.DataFrame([{'title': h.title, 'sentiment': h.headcompound, 'time': h.last_accessed} for h in article.headlines])
    if df.sentiment.nunique() > 1:
        changes.append(df)

In [13]:
import matplotlib.dates as mdates
sns.lineplot(x='time', y='sentiment', data=changes[0])
ax = plt.gca()
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
plt.xticks(rotation=45)
plt.show()

In [29]:
s = session

In [52]:
data = s.query(Headline.comp, Headline.last_accessed, Agency._bias)\
    .join(Headline.article).join(Article.agency).all()
df = pd.DataFrame(data, columns=['Sentiment', 'Last Accessed', 'Bias'])
df['Date'] = pd.to_datetime(df['Last Accessed'])
df['PSI'] = df['Sentiment'] * df['Bias']
agg = df.set_index('Date').groupby(pd.Grouper(freq='D'))\
    .agg({'Sentiment': ['mean', 'median'], 'PSI': 'mean'}).dropna().reset_index()
agg.columns = [' '.join(col).strip() for col in agg.columns.values]
agg

In [60]:
fig, ax = plt.subplots(2, 2)
fig.set_size_inches(10, 8)
sns.lineplot(x='Date', y='Sentiment mean', data=agg, ax=ax[0, 0], label='Mean Sentiment')
sns.lineplot(x='Date', y='Sentiment median', data=agg, ax=ax[0, 1], label='Median Sentiment')
sns.lineplot(x='Date', y='PSI mean', data=agg, ax=ax[1, 0], label='Mean Partisan Sentiment Index')
sns.scatterplot(x='PSI mean', y='Sentiment mean', data=agg, ax=ax[1, 1], label='Mean Sentiment vs PSI')
for i in range(2):
    for j in range(2):
        if i == 1 and j == 1:
            continue
        ax[i, j].xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
        ax[i, j].set_xticks(ax[i, j].get_xticks()[::2])
        ax[i, j].set_xticklabels(ax[i, j].get_xticklabels(), rotation=45)
ax[1, 1].set_title('Mean Sentiment vs PSI')
plt.tight_layout()

In [85]:
subq = session.query(Headline.article_id).join(Article).group_by(Headline.article_id).having(func.count(Headline.id) > 3).subquery()
data = s.query(Headline.last_accessed, Headline.comp, Headline.title, Article.url, Agency.name).join(Headline.article).join(Article.agency).filter(Article.id.in_(subq)).order_by(Headline.last_accessed).all()
df = pd.DataFrame(data, columns=['Date', 'Sentiment', 'Title', 'URL', 'Agency'])
df

In [86]:
urls = df['URL'].unique()
urls

In [94]:
good_urls = []
for url in urls:
    if len(df[df['URL'] == urls[6]]['Sentiment'].unique()) > 1:
        if df[df['URL'] == urls[6]]['Sentiment'].std() > 0.1:
            good_urls.append(url)

In [95]:
len(good_urls)

In [96]:
df[df['URL'] == urls[6]].plot(x='Date', y='Sentiment', kind='scatter')

In [98]:
df[df['URL'] == urls[6]]['Title']

In [2]:
from afinn import Afinn
afinn = Afinn()
afinn.score(" China’s coast guard has triggered ‘panic’ in Taiwan. That’s only part of Beijing’s plan ")