# Grabbing Tweets by search terms
**By:** _Mike Scheibel_ 

Collecting the mentions from the Social Dilemma documentary Twitter account.

In [1]:
import datetime
import tweepy
from pprint import pprint

# I've put my API keys in a .py file called API_keys.py
from Mike_API_Keys import api_key, api_key_secret, access_token, access_token_secret

In [2]:
# Authenticate the Tweepy API
auth = tweepy.OAuthHandler(api_key,api_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)

In [3]:
search_words = '@SocialDilemma_ -filter:retweets'


tweets_all = tweepy.Cursor(api.search,
                   tweet_mode='extended',
                   q=search_words,
                   lang='en').items()

# Put all the Tweet objects for a single Tweet into a tuple, and put all those into a list
tweets = [(tweet.full_text,tweet.created_at,tweet.user.screen_name) for tweet in tweets_all]
    

Writing Social Dilemma mentions to CSV file

In [6]:
import csv

with open(f'SocialDilemma_tweets.csv', 'w', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(["full_text","created_at","user.screen_name"])
        writer.writerows(tweets)
    
        pass

### Tokenize and Analyse Text

In [3]:
import nltk
import numpy as np

from string import punctuation
from collections import Counter

from pprint import pprint # get some prettier printing of objects

from nltk.corpus import stopwords

sw = set(stopwords.words('english'))

In [4]:
sd = open("SocialDilemma_tweets.csv", encoding='utf-8').read()

sd_clean = [w for w in sd.lower().split()]
sd_clean = [w.lower() for w in sd_clean if w.isalpha() and w not in sw]

In [6]:
total_tokens = len(sd_clean)
unique_tokens = len(set(sd_clean))
lex_diversity = len(set(sd_clean))/len(sd_clean)
avg_token_len = np.mean([len(w) for w in sd_clean])
top_10 = Counter(sd_clean).most_common(10)

results = {'tokens':total_tokens,
            'unique_tokens':unique_tokens,
            'avg_token_length':avg_token_len,
            'lexical_diversity':lex_diversity,
            'top_10':top_10}

results

{'tokens': 3635,
 'unique_tokens': 1611,
 'avg_token_length': 6.108940852819807,
 'lexical_diversity': 0.44319119669876206,
 'top_10': [('social', 63),
  ('watch', 62),
  ('media', 44),
  ('people', 44),
  ('like', 43),
  ('us', 37),
  ('watched', 36),
  ('documentary', 29),
  ('watching', 29),
  ('see', 28)]}