In [1]:
# load libraries
import nltk
from nltk.corpus import stopwords

In [2]:
# isalpha is used to filter out punctuations
words = [ w for w in nltk.corpus.state_union.words() if w.isalpha()]

In [3]:
# get english stopwords
english_stopwords = nltk.corpus.stopwords.words('english')

In [5]:
# filtering stopwords, stopwords are in lowercase
words = [ w for w in words if w.lower() not in english_stopwords]

### Word Tokenization and Creating Frequencys Distribution

In [7]:
text = """
For some quick analysis, creating a corpus could be overkill.
If all you need is a word list,
there are simpler ways to achieve that goal."""


# tokenizing strings, filtering punctuations
# words = [w for w in nltk.word_tokenize(text) if w.isalpha()]
# creating frequency distribution
freq = nltk.FreqDist(words)


['For',
 'some',
 'quick',
 'analysis',
 ',',
 'creating',
 'a',
 'corpus',
 'could',
 'be']

### visualizing information

In [6]:
freq.most_common(3)

[('must', 1568), ('people', 1291), ('world', 1128)]

In [7]:
freq.tabulate(3)

  must people  world 
  1568   1291   1128 


In [8]:
freq['America']

1076

In [9]:
freq['america']

0

In [10]:
freq['AMERICA']

3

In [11]:
# lowercase words freqdist
lower_fd = nltk.FreqDist([word.lower() for word in words])

### Extracting Corncodance and Collocations

In [12]:
# get corncodance even for punctuations
text = nltk.Text(nltk.corpus.state_union.words())
text.concordance("america", lines=4)

Displaying 4 of 1079 matches:
 would want us to do . That is what America will do . So much blood has already
ay , the entire world is looking to America for enlightened leadership to peace
beyond any shadow of a doubt , that America will continue the fight for freedom
 to make complete victory certain , America will never become a party to any pl


In [14]:
concordance_list = text.concordance_list("america", lines=3)
for entry in concordance_list:
    print(entry.line)

 would want us to do . That is what America will do . So much blood has already
ay , the entire world is looking to America for enlightened leadership to peace
beyond any shadow of a doubt , that America will continue the fight for freedom


In [17]:
# trying to use TEXT vocab method
d_words = nltk.word_tokenize("""
Beautiful is better than ugly.
Explicit is better than implicit.
imple is better than complex.
""")
d_text = nltk.Text(d_words)
fd = d_text.vocab()     # same as FreqDist
fd.tabulate(3)


    is better   than 
     3      3      3 


In [18]:
# collocations
words = [ w for w in nltk.corpus.state_union.words() if w.isalpha()]
finder = nltk.collocations.TrigramCollocationFinder.from_words(words)

In [20]:
# get frequency distribution
finder.ngram_fd.most_common(2)

[(('the', 'United', 'States'), 294), (('the', 'American', 'people'), 185)]

In [21]:
finder.ngram_fd.tabulate(2)

  ('the', 'United', 'States') ('the', 'American', 'people') 
                          294                           185 


### Sentiment Analyzer

In [22]:
from nltk.sentiment import SentimentIntensityAnalyzer

In [23]:
sia = SentimentIntensityAnalyzer()
sia.polarity_scores("Wow, NLTK is really powerful!")

{'neg': 0.0, 'neu': 0.295, 'pos': 0.705, 'compound': 0.8012}

In [24]:
tweets = [t.replace("://",  "//") for t in nltk.corpus.twitter_samples.strings()]

In [27]:
from random import shuffle



def is_postive(tweet: str) -> bool:
    return sia.polarity_scores(tweet)['compound'] > 0

shuffle(tweets)
for tweet in tweets[:10]:
    print("> 0", is_postive(tweet), tweet)

> 0 True @sweettoothmarti should keep you going for a few mins lol :-)))))
> 0 False @Red_Doom @Twymaan you lied to me :(
You said you could t drink :(
> 0 True @IanBell1916 The aim is to make Scots vote for labour, to keep tories out. How original
> 0 True Ed Miliband's refusal to admit that Labour overspent could cost him dear - Spectator Blogs http//t.co/2HuDWGfqjD
> 0 True RT @HouseOfTraitors: #bbcbias Number of times Party/Leader mentioned tonight on @BBCNews 

LAB  14
CON  9
LIB    7
SNP  5
UKIP  0
> 0 False RT @StewartHosieSNP: @theSNP want to lock the Tories out of power. Why is Miliband threatening to allow Cameron back into Downing Street?  …
> 0 True @blairmcdougall @tullythedhog EdM was quite clear. HE would be happier with a Tory PM.
> 0 False RT @NursieDear25: Not a fan of Miliband, but the idea of him being so bitter over losing the Scottish vote that he enables another Tory gov…
> 0 False Farage is a bit of an anti-climax
> 0 True RT @Tommy_Colc: Financial Times come ou