In [None]:
# Cryptocurrencies news headlines sentiment (polarity & subjectivity) analysis (positive, neutral, negative)

In [None]:
# Install
# !pip install textblob

In [None]:
# Imports
import pandas as pd
import numpy as np
from textblob import TextBlob
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

In [None]:
# Load data
from google.colab import files
files.upload()

In [None]:
df = pd.read_csv('crypto-news.csv', delimiter=';')
df = df.set_index(pd.to_datetime(df['Date'].values, dayfirst=True))
df.shape
df.index
df.head()

In [None]:
# Define function for calculating text polarity
def polarity(text):
  return TextBlob(text).sentiment.polarity

# Add polarity column
df['Polarity'] = df['Headline'].apply(polarity)

In [None]:
# Define function for labeling polarity scores
def sentiment(score):
  if score < 0:
    return 'Negative'
  elif score == 0:
    return 'Neutral'
  else: 
    return 'Positive'

df['Sentiment'] = df['Polarity'].apply(sentiment)
df.head()

In [None]:
# Visualise sentiment count over set period of time (in this case dataset time frame length)
df['Sentiment'].value_counts() .plot(kind='bar')
plt.title('Sentiment Histogram')
plt.xlabel('Sentiment')
plt.ylabel('Counts')
plt.show()

In [None]:
# Plot polarity sum for each day - this is much better approach than one above
plt.figure(figsize=(12.33, 4.5))
plt.title('Sentiment Sum over Time')
polaritySumPerDay = df.groupby(['Date']).sum()['Polarity'] # this may go beyond <-1, 1> range, as there is a SUM
plt.plot(polaritySumPerDay.index, polaritySumPerDay)
# sudden decrease in sentiment is related to Xinjiang region blackout mainly - I suppose

In [None]:
# Show polarity sum per day
polaritySumPerDay
# In general because we sum polarities, the result could easily be beyond 1, especially on news rich day!

In [None]:
# Count number of news per day
polarity_count = df.groupby(['Date']).count()['Polarity']
polarity_count

In [None]:
# Calculate average polarity per day
polarity_avg = polaritySumPerDay / polarity_count
polarity_avg

In [None]:
# Display average polarity
plt.figure(figsize=(12.33, 4.5))
plt.plot(polarity_avg.index, polarity_avg)

In [None]:
# Define function calculating subjectivity.
# Subjectivity is a real number between 0 and 1, where 0 is very objectie and 1 is very subjective
def subjectivity(text):
  return TextBlob(text).sentiment.subjectivity

df['Subjectivity'] = df['Headline'].apply(subjectivity)
df.head()

In [None]:
# Visualise subjectivity and polarity on scattered diagram
plt.figure(figsize=(8, 6))
for i in range(0, df.shape[0]):
  plt.scatter(df['Polarity'][i], df['Subjectivity'][i], color='Purple')
plt.title('Polarity/Subjectivity Analysis')
plt.xlabel('polarity')
plt.ylabel('subjectivity (objective => subjective)')
plt.show()