# Setup

*   Installation of third party libraries
*   Imports
*   Download of third-party resources for NLTK




In [0]:
!pip install git+https://github.com/stefangindl/readin.git

In [0]:
from collections import defaultdict
from statistics import mean

from nltk.tokenize import TweetTokenizer
from textblob import TextBlob
import nltk
import pandas as pd

import readin


nltk.download('brown')
nltk.download('punkt')

Setting the credentials - the real ones remain hidden.

In [0]:
creds = {'access_token': 'abc123',
         'access_token_secret': 'abc123',
         'consumer_key': 'abc123',
         'consumer_secret': 'abc123',
        }

# Data Acquisition

Crawling the Twitter timeline of the profile "stefan_gindl".

In [0]:
df = readin.from_twitter('stefan_gindl', **creds)

# Analyze the Tweets


1.   Extract mentions
2.   Determine Tweet sentiment



In [0]:
tokenizer = TweetTokenizer()

def get_mentions(text):
  return [token for token in tokenizer.tokenize(text) if token.startswith('@')]


def get_sentiment(text):
  return TextBlob(text).polarity

df['sentiment'] = df['text'].apply(get_sentiment)
df['mentions'] = df['text'].apply(get_mentions)

Get the average sentiment of select text tokens:

In [0]:
cnts = defaultdict(list)

for tokens, sentiment in zip(df['mentions'], df['sentiment']):
  for token in tokens:
    cnts[token].append(sentiment)
    
cnts = {token: mean(sentiments) for token, sentiments in cnts.items()}

# Visualization

In [0]:
ser_cnts = pd.Series(cnts)
ser_cnts.sort_values().plot.bar()

# Make a Twitter-sentiment network

In [0]:
import networkx as nx

In [0]:
def get_sentiment_color(value):
  if value < 0:
    return 'red'
  if value > 0:
    return 'green'
  return 'grey'

G = nx.Graph()

for token in cnts.keys():
  G.add_edge('Stefan Gindl', token)
  
colors = [get_sentiment_color(sentiment) for _, sentiment in cnts.items()]
nx.draw(G, edge_color=colors, with_labels=True)

# Appendix

Extract hashtags and noun phrases from Tweets.


In [0]:
def get_topics(text):
  return [token for token in tokenizer.tokenize(text) if token.startswith('#')]

def get_noun_phrases(text):
  return TextBlob(text).noun_phrases


df['topics'] = df['text'].apply(get_topics)
df['noun_phrases'] = df['text'].apply(get_noun_phrases)