#### Sample module to generate summary from text corpus data

In [1]:
import re
import json
import pandas as pd
from textblob import TextBlob
from textblob import Blobber
from textblob.taggers import NLTKTagger
from textblob.tokenizers import SentenceTokenizer
from textblob.parsers import PatternParser
from textblob.np_extractors import ConllExtractor
from textblob.sentiments import NaiveBayesAnalyzer

In [2]:
tb = Blobber(pos_tagger=NLTKTagger(), 
            tokenizer=SentenceTokenizer(),
            parser=PatternParser(),
            np_extractor=ConllExtractor(),
            analyzer=NaiveBayesAnalyzer())

In [3]:
from textblob import Word
from collections import OrderedDict

In [4]:
class Summary:
    """Generates summary."""

    def __init__(self, text):
        self.text = text
        self.words = text.split()

    def generate_ngrams(self, n=4):
        return self.text.ngrams(n)

    def generate_parse_data(self):
        data = self.text.parse().split(' ')
        return [p.split('/',1) for p in data]

    def generate_sentiment_data(self):
        for s in self.text.sentences:
            return [s, s.sentiment]

    def generate_wordcount(self):
        return self.text.word_counts

    def generate_tokens(self):
        return self.text.tokens

    def generate_synsets(self):
        unique = list(OrderedDict.fromkeys(self.words)) # remove duplicates
        return [(w, w.synsets) for w in unique]

    def generate_spell_suggestion(self):
        words_ = list(OrderedDict.fromkeys(self.words))
        suggestion = [(w, w.spellcheck()) for w in words_] 
        return suggestion
        

In [5]:
query = 'That woild be wondergul to have you on this labrary'
q = tb(query)

In [6]:
s = Summary(q)

In [7]:
s.generate_wordcount()

defaultdict(int,
            {'that': 1,
             'woild': 1,
             'be': 1,
             'wondergul': 1,
             'to': 1,
             'have': 1,
             'you': 1,
             'on': 1,
             'this': 1,
             'labrary': 1})

In [8]:
s.generate_ngrams(n=3)

[WordList(['That', 'woild', 'be']),
 WordList(['woild', 'be', 'wondergul']),
 WordList(['be', 'wondergul', 'to']),
 WordList(['wondergul', 'to', 'have']),
 WordList(['to', 'have', 'you']),
 WordList(['have', 'you', 'on']),
 WordList(['you', 'on', 'this']),
 WordList(['on', 'this', 'labrary'])]

In [9]:
s.generate_parse_data()

[['That', 'DT/B-NP/O'],
 ['woild', 'NN/I-NP/O'],
 ['be', 'VB/B-VP/O'],
 ['wondergul', 'NN/B-NP/O'],
 ['to', 'TO/B-PP/O'],
 ['have', 'VBP/B-VP/O'],
 ['you', 'PRP/B-NP/O'],
 ['on', 'IN/B-PP/B-PNP'],
 ['this', 'DT/B-NP/I-PNP'],
 ['labrary', 'NN/I-NP/I-PNP']]

In [10]:
s.generate_sentiment_data()

[Sentence("That woild be wondergul to have you on this labrary"),
 Sentiment(classification='neg', p_pos=0.46325421468243416, p_neg=0.5367457853175658)]

In [11]:
s.generate_spell_suggestion()

[('That',
  [('That', 0.8001534821257275),
   ('What', 0.1925561169022191),
   ('Hat', 0.006714843000575558),
   ('Chat', 0.0005755579714779049)]),
 ('woild',
  [('would', 0.831063829787234),
   ('world', 0.15404255319148935),
   ('wild', 0.014893617021276596)]),
 ('be', [('be', 1.0)]),
 ('wondergul', [('wonderful', 1.0)]),
 ('to', [('to', 1.0)]),
 ('have', [('have', 1.0)]),
 ('you', [('you', 1.0)]),
 ('on', [('on', 1.0)]),
 ('this', [('this', 1.0)]),
 ('labrary', [('library', 1.0)])]

In [12]:
s.generate_synsets()

[('That', []),
 ('woild', []),
 ('be',
  [Synset('beryllium.n.01'),
   Synset('be.v.01'),
   Synset('be.v.02'),
   Synset('be.v.03'),
   Synset('exist.v.01'),
   Synset('be.v.05'),
   Synset('equal.v.01'),
   Synset('constitute.v.01'),
   Synset('be.v.08'),
   Synset('embody.v.02'),
   Synset('be.v.10'),
   Synset('be.v.11'),
   Synset('be.v.12'),
   Synset('cost.v.01')]),
 ('wondergul', []),
 ('to', []),
 ('have',
  [Synset('rich_person.n.01'),
   Synset('have.v.01'),
   Synset('have.v.02'),
   Synset('experience.v.03'),
   Synset('own.v.01'),
   Synset('get.v.03'),
   Synset('consume.v.02'),
   Synset('have.v.07'),
   Synset('hold.v.03'),
   Synset('have.v.09'),
   Synset('have.v.10'),
   Synset('have.v.11'),
   Synset('have.v.12'),
   Synset('induce.v.02'),
   Synset('accept.v.02'),
   Synset('receive.v.01'),
   Synset('suffer.v.02'),
   Synset('have.v.17'),
   Synset('give_birth.v.01'),
   Synset('take.v.35')]),
 ('you', []),
 ('on',
  [Synset('on.a.01'),
   Synset('on.a.02'),
   S

In [13]:
s.generate_tokens() # senetence tokenizer

WordList(['That woild be wondergul to have you on this labrary'])