# Sentiment Analysis Notebook

Generate the sentiment polarity column and a new CSV file for one day of news articles.

In [None]:
import pandas
import textblob

### One Day Sample

In [None]:
articles_data = pandas.read_csv('one_day_sample.csv')

In [None]:
def get_avg_polarity(text):
    """The text form which to calculate polarity.
    
    Args:
        text (str): The text whose polarity should be calculated.
    Returns:
        float: The average polarity across all sentences.
    """
    polarities = list(map(
        lambda sentence: sentence.sentiment.polarity,
        textblob.TextBlob(str(text)).sentences
    ))
    
    if len(polarities) == 0:
        return None
    else:
        return sum(polarities) / len(polarities)

In [None]:
articles_data['descriptionPolarity'] = articles_data['description'].apply(get_avg_polarity)

In [None]:
articles_data['descriptionPolarity'].plot.hist()

Save results with sentiment

In [None]:
articles_data[['source', 'title', 'descriptionPolarity']].to_csv('articles_with_sentiment.csv')

In [None]:
articles_data

Get average for articles by source

In [None]:
sources_ranked = articles_data.groupby('source')['descriptionPolarity'].mean()\
    .copy()\
    .reset_index()\
    .sort_values('descriptionPolarity')

In [None]:
sources_ranked.to_csv('sources_ranked.csv')

In [None]:
sources_ranked

Some EDA

In [None]:
articles_data.groupby('source')['descriptionPolarity'].mean()

In [None]:
articles_data.groupby('source')['descriptionPolarity'].min()

In [None]:
articles_data.groupby('source')['descriptionPolarity'].max()

### March Extremes

In [None]:
march_data = pandas.read_csv('march_descriptions.csv')

In [None]:
march_data['descriptionPolarity'] = march_data['description'].apply(get_avg_polarity)

In [None]:
min_vals = march_data.groupby('dayOfMarch')['descriptionPolarity'].min().reset_index()

In [None]:
max_vals = march_data.groupby('dayOfMarch')['descriptionPolarity'].max().reset_index()

In [None]:
joined_vals = pandas.merge(left=min_vals, right=max_vals, on='dayOfMarch', suffixes=['Min', 'Max'])

In [None]:
ax = joined_vals.plot.line(
    x='dayOfMarch',
    y=['descriptionPolarityMin', 'descriptionPolarityMax'],
    title='Range of Sentiment (June 2019)',
    color='#007AFF',
    legend=False,
    figsize=(10, 5)
)

ax.set_xlabel('Day of June 2019')
ax.set_ylabel('Sentiment Polarity')