In [1]:
import nltk

In [2]:
from nltk.tokenize import sent_tokenize, word_tokenize, PunktSentenceTokenizer

In [3]:
example_text="Hello Mr. Smith, how are you doing today? The weather is great and Python is awesome. The sky is blue"

In [4]:
print(sent_tokenize(example_text))

['Hello Mr. Smith, how are you doing today?', 'The weather is great and Python is awesome.', 'The sky is blue']


In [5]:
words=word_tokenize(example_text)

In [6]:
for i in word_tokenize(example_text):
    print(i)

Hello
Mr.
Smith
,
how
are
you
doing
today
?
The
weather
is
great
and
Python
is
awesome
.
The
sky
is
blue


In [7]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [8]:
stop_words=list(stopwords.words("english"))
stop_words.append("The")

In [9]:
print(words)

['Hello', 'Mr.', 'Smith', ',', 'how', 'are', 'you', 'doing', 'today', '?', 'The', 'weather', 'is', 'great', 'and', 'Python', 'is', 'awesome', '.', 'The', 'sky', 'is', 'blue']


In [10]:
x=[]
for w in words:
    if w not in stop_words:
        x.append(w)

In [11]:
y=[]
y=[w for w in words if w not in stop_words]

In [12]:
y

['Hello',
 'Mr.',
 'Smith',
 ',',
 'today',
 '?',
 'weather',
 'great',
 'Python',
 'awesome',
 '.',
 'sky',
 'blue']

In [13]:
from nltk.stem import PorterStemmer

In [14]:
ps=PorterStemmer()
example_words=["python","pythoner","pythoning","pythoned","pythonly"]

In [15]:
for w in example_words:
    print(ps.stem(w))

python
python
python
python
pythonli


In [16]:
new_text="It is very important to be pythonly while you are pythoning with python. All pythoners have pythoned poorly atleast once"

In [17]:
words =word_tokenize(new_text)

In [18]:
for w in words:
    print(ps.stem(w))

It
is
veri
import
to
be
pythonli
while
you
are
python
with
python
.
all
python
have
python
poorli
atleast
onc


In [19]:
import nltk
from nltk.corpus import state_union

In [20]:
train_text=state_union.raw("2005-GWBush.txt")
sample_text=state_union.raw("2006-GWBush.txt")

In [21]:
custom_sent_tokenizer=PunktSentenceTokenizer(train_text)
tokenized=custom_sent_tokenizer.tokenize(sample_text)

In [22]:
def process_content():
    try:
        for i in tokenized:
            words=nltk.word_tokenize(i)
            tagged=nltk.pos_tag(words)
            print(tagged)
    except Exception as e:
        print(str(e))

# Chunking

In [24]:
# POS tag list:

# CC	coordinating conjunction
# CD	cardinal digit
# DT	determiner
# EX	existential there (like: "there is" ... think of it like "there exists")
# FW	foreign word
# IN	preposition/subordinating conjunction
# JJ	adjective	'big'
# JJR	adjective, comparative	'bigger'
# JJS	adjective, superlative	'biggest'
# LS	list marker	1)
# MD	modal	could, will
# NN	noun, singular 'desk'
# NNS	noun plural	'desks'
# NNP	proper noun, singular	'Harrison'
# NNPS	proper noun, plural	'Americans'
# PDT	predeterminer	'all the kids'
# POS	possessive ending	parent\'s
# PRP	personal pronoun	I, he, she
# PRP$	possessive pronoun	my, his, hers
# RB	adverb	very, silently, 
# RBR	adverb, comparative	better
# RBS	adverb, superlative	best
# RP	particle	give up
# TO	to	go 'to' the store.
# UH	interjection	errrrrrrrm
# VB	verb, base form	take
# VBD	verb, past tense	took
# VBG	verb, gerund/present participle	taking
# VBN	verb, past participle	taken
# VBP	verb, sing. present, non-3d	take
# VBZ	verb, 3rd person sing. present	takes
# WDT	wh-determiner	which
# WP	wh-pronoun	who, what
# WP$	possessive wh-pronoun	whose
# WRB	wh-abverb	where, when

In [25]:
# Modifiers:

# {1,3} = for digits, u expect 1-3 counts of digits, or "places"
# + = match 1 or more
# ? = match 0 or 1 repetitions.
# * = match 0 or MORE repetitions
# $ = matches at the end of string
# ^ = matches start of a string
# | = matches either/or. Example x|y = will match either x or y
# [] = range, or "variance"
# {x} = expect to see this amount of the preceding code.
# {x,y} = expect to see this x-y amounts of the precedng code

# Identifiers:

# \d = any number
# \D = anything but a number
# \s = space
# \S = anything but a space
# \w = any letter
# \W = anything but a letter
# . = any character, except for a new line
# \b = space around whole words
# \. = period. must use backslash, because . normally means any character.


In [26]:
def process_content1():
    try:
        for i in tokenized:
            words=nltk.word_tokenize(i)
            tagged=nltk.pos_tag(words)
            chunkGram =r"""Chunk: {<RB.?>*<VB.?>*<NNP><NN>?}"""
            chunkParser=nltk.RegexpParser(chunkGram)
            chunked=chunkParser.parse(tagged)
            chunked.draw()
    except Exception as e:
        print(str(e))

# Chinking

In [28]:
def process_content2():
    try:
        for i in tokenized[5:]:
            print(i)
            words=nltk.word_tokenize(i)
            tagged=nltk.pos_tag(words)
            chunkGram=r"""Chunk: {<.*>+} 
                        }<VB.?|IN|DT>+{ """
            chunkParser=nltk.RegexpParser(chunkGram)
            chunked=chunkParser.parse(tagged)
#             chunked.draw()
    except Exception as e:
        print(str(e))

In [30]:
sentences=nltk.sent_tokenize(train_text)

In [31]:
sentences[1]

'And tonight that is a privilege we share with newly-elected leaders of Afghanistan, the Palestinian Territories, Ukraine, and a free and sovereign Iraq.'

In [32]:
tokenized[1]

'Mr. Speaker, Vice President Cheney, members of Congress, members of the Supreme Court and diplomatic corps, distinguished guests, and fellow citizens: Today our nation lost a beloved, graceful, courageous woman who called America to its founding ideals and carried on a noble dream.'

# Name Entity Recognition

In [33]:
def process_content3():
    try:
        for i in tokenized:
            words=nltk.word_tokenize(i)
            tagged=nltk.pos_tag(words)
            namedEnt=nltk.ne_chunk(tagged)
            print(namedEnt)
    except Exception as e:
        print(str(e))

# Lemmatizing

#### Most of the times better stemming

In [35]:
from nltk.stem import WordNetLemmatizer

In [36]:
lemmatizer=WordNetLemmatizer()
print(lemmatizer.lemmatize('cats'))
print(lemmatizer.lemmatize('cacti'))
print(lemmatizer.lemmatize('geese'))
print(lemmatizer.lemmatize('rocks'))
print(lemmatizer.lemmatize('python'))
print(lemmatizer.lemmatize('better',pos="a"))
print(lemmatizer.lemmatize('best',pos="a"))
print(lemmatizer.lemmatize('run'))
print(lemmatizer.lemmatize('better','v'))



cat
cactus
goose
rock
python
good
best
run
better


# NLTK Corpora

In [37]:
import sys
print(sys.path)

['', '/Users/paras', '/anaconda3/lib/python37.zip', '/anaconda3/lib/python3.7', '/anaconda3/lib/python3.7/lib-dynload', '/anaconda3/lib/python3.7/site-packages', '/anaconda3/lib/python3.7/site-packages/aeosa', '/anaconda3/lib/python3.7/site-packages/IPython/extensions', '/Users/paras/.ipython']


In [38]:
print(nltk.__file__)

/anaconda3/lib/python3.7/site-packages/nltk/__init__.py


In [39]:
from nltk.corpus import gutenberg

In [40]:
sample=gutenberg.raw("bible-kjv.txt")

In [41]:
tok=sent_tokenize(sample)

In [42]:
print(tok[5:15])

['1:5 And God called the light Day, and the darkness he called Night.', 'And the evening and the morning were the first day.', '1:6 And God said, Let there be a firmament in the midst of the waters,\nand let it divide the waters from the waters.', '1:7 And God made the firmament, and divided the waters which were\nunder the firmament from the waters which were above the firmament:\nand it was so.', '1:8 And God called the firmament Heaven.', 'And the evening and the\nmorning were the second day.', '1:9 And God said, Let the waters under the heaven be gathered together\nunto one place, and let the dry land appear: and it was so.', '1:10 And God called the dry land Earth; and the gathering together of\nthe waters called he Seas: and God saw that it was good.', '1:11 And God said, Let the earth bring forth grass, the herb yielding\nseed, and the fruit tree yielding fruit after his kind, whose seed is\nin itself, upon the earth: and it was so.', '1:12 And the earth brought forth grass, and

# WordNet

In [43]:
from nltk.corpus import wordnet

In [44]:
syns=wordnet.synsets("program")
syns

[Synset('plan.n.01'),
 Synset('program.n.02'),
 Synset('broadcast.n.02'),
 Synset('platform.n.02'),
 Synset('program.n.05'),
 Synset('course_of_study.n.01'),
 Synset('program.n.07'),
 Synset('program.n.08'),
 Synset('program.v.01'),
 Synset('program.v.02')]

In [45]:
#synset
print(syns[0].name())

plan.n.01


In [46]:
# just the word
print(syns[0].lemmas()[0].name())

plan


In [47]:
#definition
print(syns[0].definition())

a series of steps to be carried out or goals to be accomplished


In [48]:
#examples
syns[0].examples()

['they drew up a six-step plan', 'they discussed plans for a new bond issue']

In [49]:
synonyms =[]
antonyms=[]

In [50]:
for syn in wordnet.synsets("good"):
    for l in syn.lemmas():
        synonyms.append(l.name())
        if l.antonyms():
            print(l.antonyms()[0])
            antonyms.append(l.antonyms()[0].name())

Lemma('evil.n.03.evil')
Lemma('evil.n.03.evilness')
Lemma('bad.n.01.bad')
Lemma('bad.n.01.badness')
Lemma('bad.a.01.bad')
Lemma('evil.a.01.evil')
Lemma('ill.r.01.ill')


In [51]:
synonyms

['good',
 'good',
 'goodness',
 'good',
 'goodness',
 'commodity',
 'trade_good',
 'good',
 'good',
 'full',
 'good',
 'good',
 'estimable',
 'good',
 'honorable',
 'respectable',
 'beneficial',
 'good',
 'good',
 'good',
 'just',
 'upright',
 'adept',
 'expert',
 'good',
 'practiced',
 'proficient',
 'skillful',
 'skilful',
 'good',
 'dear',
 'good',
 'near',
 'dependable',
 'good',
 'safe',
 'secure',
 'good',
 'right',
 'ripe',
 'good',
 'well',
 'effective',
 'good',
 'in_effect',
 'in_force',
 'good',
 'good',
 'serious',
 'good',
 'sound',
 'good',
 'salutary',
 'good',
 'honest',
 'good',
 'undecomposed',
 'unspoiled',
 'unspoilt',
 'good',
 'well',
 'good',
 'thoroughly',
 'soundly',
 'good']

In [52]:
antonyms

['evil', 'evilness', 'bad', 'badness', 'bad', 'evil', 'ill']

In [53]:
w1=wordnet.synset("ship.n.01")
w2=wordnet.synset("boat.n.01")

In [54]:
print(w1.wup_similarity(w2))

0.9090909090909091


In [55]:
w1=wordnet.synset("ship.n.01")
w2=wordnet.synset("car.n.01")

In [56]:
print(w1.wup_similarity(w2))

0.6956521739130435


# Text Classification

In [57]:
import random
from nltk.corpus import movie_reviews

In [58]:
documents=[(list(movie_reviews.words(fileid)),category)
          for category in movie_reviews.categories()
          for fileid in movie_reviews.fileids(category)]

In [60]:
# another way
documents=[]
for category in movie_reviews.categories():
#     print(category)
    for fileid in movie_reviews.fileids(category):
#         print(category)
#         print(movie_reviews.words(fileid),category)
#         print(fileid)
        documents.append((list(movie_reviews.words(fileid)),category))

In [61]:
random.shuffle(documents)

In [62]:
all_words=[]
for w in movie_reviews.words():
    all_words.append(w.lower())

all_words=nltk.FreqDist(all_words)
# print(all_words.most_common(15))

In [63]:
print(all_words["stupid"])

253


In [64]:
word_features=list(all_words.keys())[:3000]

In [185]:
def find_features(document):
    print(document)
    words=word_tokenize(document)
    print(words)
    return
    features={}
    for w in word_features:
        features[w]=(w in document)
    return features

In [66]:
# featuresets=[]
featuresets1=[(find_features(rev),category) for (rev,category) in documents]
# featuresets=[]
featuresets2=[find_features(rev) for (rev,_) in documents]

In [70]:
for rev,category in documents[:1]:
    print(str(rev)+": "+str(category))

['note', ':', 'some', 'may', 'consider', 'portions', 'of', 'the', 'following', 'text', 'to', 'be', 'spoilers', '.', 'be', 'forewarned', '.', 'during', 'the', 'three', 'years', 'since', 'the', 'release', 'of', 'the', 'groundbreaking', 'success', 'pulp', 'fiction', ',', 'the', 'cinematic', 'output', 'from', 'its', 'creator', ',', 'quentin', 'tarantino', ',', 'has', 'been', 'surprisingly', 'low', '.', 'oh', ',', 'he', "'", 's', 'been', 'busy', '--', 'doing', 'the', 'talk', 'show', 'circuit', ',', 'taking', 'small', 'roles', 'in', 'various', 'films', ',', 'overseeing', 'the', 'production', 'of', 'his', 'screenplay', 'from', 'dusk', 'till', 'dawn', ',', 'making', 'cameo', 'appearances', 'on', 'television', 'shows', ',', 'providing', 'a', 'vignette', 'for', 'the', 'ill', '-', 'fated', 'anthology', 'four', 'rooms', '--', 'everything', ',', 'it', 'seems', ',', 'except', 'direct', 'another', 'feature', '-', 'length', 'film', '.', 'it', "'", 's', 'been', 'the', 'long', 'intermission', 'between',

In [120]:
train=featuresets1[:1900]
test=featuresets1[1900:]
classifier=nltk.NaiveBayesClassifier.train(train)
print("Naive Bayes Algo accuracy:", (nltk.classify.accuracy(classifier,test)))
classifier.show_most_informative_features(15)

Naive Bayes Algo accuracy: 0.79
Most Informative Features
                   sucks = True              neg : pos    =      9.8 : 1.0
                  annual = True              pos : neg    =      9.6 : 1.0
               unlikable = True              neg : pos    =      9.0 : 1.0
                 frances = True              pos : neg    =      9.0 : 1.0
           unimaginative = True              neg : pos    =      8.4 : 1.0
             silverstone = True              neg : pos    =      7.7 : 1.0
               atrocious = True              neg : pos    =      7.0 : 1.0
                  shoddy = True              neg : pos    =      7.0 : 1.0
                 idiotic = True              neg : pos    =      7.0 : 1.0
              schumacher = True              neg : pos    =      6.6 : 1.0
                  turkey = True              neg : pos    =      6.6 : 1.0
                 cunning = True              pos : neg    =      6.3 : 1.0
                 singers = True           

# Pickle

In [72]:
import pickle

In [73]:
save_classifier=open("naivebayes.pickle","wb")
pickle.dump(classifier,save_classifier)
save_classifier.close()

In [74]:
classifier_f=open("naivebayes.pickle","rb")
classifier=pickle.load(classifier_f)
classifier_f.close()

# Using Scikit Learn

In [75]:
from nltk.classify.scikitlearn import SklearnClassifier

In [96]:
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn.linear_model import LogisticRegression,SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC

In [81]:
MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(train)
print(nltk.classify.accuracy(MNB_classifier,test))

0.83


In [87]:
# Gaussian_classifier=SklearnClassifier(GaussianNB())
# Gaussian_classifier.train(train.toarray())
# print(nltk.classify.accuracy(Gaussian_classifier,test))

In [86]:
BernoulliNB_classifier=SklearnClassifier(BernoulliNB())
BernoulliNB_classifier.train(train)
print(nltk.classify.accuracy(BernoulliNB_classifier,test))

0.78

In [90]:
Logistic_classifier = SklearnClassifier(LogisticRegression())
Logistic_classifier.train(train)
print(nltk.classify.accuracy(Logistic_classifier,test))



0.79


In [92]:
SGD_classifier = SklearnClassifier(SGDClassifier())
SGD_classifier.train(train)
print(nltk.classify.accuracy(SGD_classifier,test))

0.79


In [94]:
svc=SklearnClassifier(SVC())
svc.train(train)
print(nltk.classify.accuracy(svc,test))



0.79

In [98]:
lsvc=SklearnClassifier(LinearSVC())
lsvc.train(train)
print(nltk.classify.accuracy(lsvc,test))



0.75

In [99]:
nusvc=SklearnClassifier(NuSVC())
nusvc.train(train)
print(nltk.classify.accuracy(nusvc,test))



0.82

# Each classifier and vote on each one of them 

In [100]:
from nltk.classify import ClassifierI
from statistics import mode

In [135]:
class VoteClassifier(ClassifierI):
    def __init__(self, *classifiers):
        self._classifiers=classifiers
        
    def classify(self, features):
        votes=[]
        for c in self._classifiers:
            v=c.classify(features)
            votes.append(v)
        return mode(votes)
        
    def confidence(self,features):
        votes=[]
        for c in self._classifiers:
            v=c.classify(features)
            votes.append(v)
        choice_votes=votes.count(mode(votes))
        conf = float(choice_votes)/len(votes)
        return conf
        

In [136]:
voted_classifier=VoteClassifier(
                                MNB_classifier, 
                                BernoulliNB_classifier,
                                Logistic_classifier,
                                lsvc,
                                svc, 
                                SGD_classifier, 
                                nusvc)

In [137]:
nltk.classify.accuracy(voted_classifier,test)

0.81

In [125]:
print("Classification:", voted_classifier.classify(test[0][0]), "Confidence %", voted_classifier.confidence(test[0][0]))
print("Classification:", voted_classifier.classify(test[1][0]), "Confidence %", voted_classifier.confidence(test[1][0]))
print("Classification:", voted_classifier.classify(test[2][0]), "Confidence %", voted_classifier.confidence(test[2][0]))
print("Classification:", voted_classifier.classify(test[3][0]), "Confidence %", voted_classifier.confidence(test[3][0]))
print("Classification:", voted_classifier.classify(test[4][0]), "Confidence %", voted_classifier.confidence(test[4][0]))

Classification: neg Confidence % 0.7142857142857143
Classification: neg Confidence % 1.0
Classification: neg Confidence % 1.0
Classification: pos Confidence % 0.5714285714285714
Classification: neg Confidence % 1.0


In [140]:
print(test[0][0])



# Sentiment Analysis

In [200]:
import io

In [201]:
short_pos=io.open("/Users/paras/Desktop/Sentiment/positive.txt","r",encoding='latin-1').read()
short_neg=io.open("/Users/paras/Desktop/Sentiment/negative.txt","r",encoding='latin-1').read()

In [202]:
documents=[]
for r in short_pos.split('\n'):
    documents.append((r,"pos"))
for r in short_neg.split('\n'):
    documents.append((r,"neg"))

In [203]:
short_pos_words=word_tokenize(short_pos)
short_neg_words=word_tokenize(short_neg)

In [204]:
all_words=[]

In [205]:
for w in short_pos_words:
    all_words.append(w.lower())
for w in short_neg_words:
    all_words.append(w.lower())

In [206]:
all_words[:11]

['the',
 'rock',
 'is',
 'destined',
 'to',
 'be',
 'the',
 '21st',
 'century',
 "'s",
 'new']

In [207]:
all_words=nltk.FreqDist(all_words)

In [208]:
all_words

FreqDist({'.': 14010, 'the': 10113, ',': 10037, 'a': 7307, 'and': 6202, 'of': 6063, 'to': 4234, 'is': 3559, "'s": 3537, 'it': 3422, ...})

In [209]:
word_features=list(all_words.keys())[:5000]

In [240]:
word_features[:10]

['the', 'rock', 'is', 'destined', 'to', 'be', '21st', 'century', "'s", 'new']

In [243]:
def find_features(document):
#     print(document)
    words=word_tokenize(document)
#     print(words)
    features={}
    for w in word_features:
        features[w]=(w in words)
    return features

In [244]:
featuresets=[(find_features(rev),category) for (rev,category) in documents]

In [246]:
random.shuffle(featuresets)

In [247]:
train=featuresets[:10000]
test=featuresets[10000:]

In [248]:
len(featuresets)

10664

In [249]:
naive=nltk.NaiveBayesClassifier.train(train)

In [250]:
nltk.classify.accuracy(naive,test)

0.7228915662650602

# Twitter Sentiment Analysis

In [283]:
allowed_word_types=["J"]

In [284]:
all_words=[]

In [285]:
for p in short_pos.split("\n"):
    documents.append((p,"pos"))
    words=word_tokenize(p)
    pos=nltk.pos_tag(words)
    for w in pos:
        if w[1][0] in allowed_word_types:
            all_words.append(w[0].lower())
        

In [286]:
for n in short_neg.split("\n"):
    documents.append((n,"neg"))
    words=word_tokenize(n)
    pos=nltk.pos_tag(words)
    for w in pos:
        if w[1][0] in allowed_word_types:
            all_words.append(w[0].lower())

In [288]:
documents[:11]

[('the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal . ',
  'pos'),
 ('the gorgeously elaborate continuation of " the lord of the rings " trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson\'s expanded vision of j . r . r . tolkien\'s middle-earth . ',
  'pos'),
 ('effective but too-tepid biopic', 'pos'),
 ('if you sometimes like to go to the movies to have fun , wasabi is a good place to start . ',
  'pos'),
 ("emerges as something rare , an issue movie that's so honest and keenly observed that it doesn't feel like one . ",
  'pos'),
 ('the film provides some great insight into the neurotic mindset of all comics -- even those who have reached the absolute top of the game . ',
  'pos'),
 ('offers that rare combination of entertainment and education . ', 'pos'),
 ('perhaps no picture ever made has more literally

In [291]:
save_documents=open("/Users/paras/Desktop/pickledalgos/documents.pickle","wb")
pickle.dump(documents,save_documents)
save_documents.close()

In [292]:
all_words=nltk.FreqDist(all_words)

In [293]:
word_features=list(all_words.keys())[:5000]

In [294]:
save_word_features=open("/Users/paras/Desktop/pickledalgos/word_features5k.pickle","wb")
pickle.dump(word_features,save_word_features)
save_word_features.close()

In [295]:
featuresets=[(find_features(rev),category) for (rev,category) in documents]

In [296]:
random.shuffle(featuresets)

In [297]:
train=featuresets[:10000]
test=featuresets[10000:]

In [298]:
classifier=nltk.NaiveBayesClassifier.train(train)
classifier.show_most_informative_features(15)

Most Informative Features
                  stupid = True              neg : pos    =     27.7 : 1.0
                  boring = True              neg : pos    =     24.8 : 1.0
                mediocre = True              neg : pos    =     22.5 : 1.0
                    dull = True              neg : pos    =     19.7 : 1.0
                 generic = True              neg : pos    =     18.3 : 1.0
              remarkable = True              pos : neg    =     17.6 : 1.0
                   stale = True              neg : pos    =     15.2 : 1.0
                 winning = True              pos : neg    =     14.6 : 1.0
                  forced = True              neg : pos    =     14.1 : 1.0
                    flat = True              neg : pos    =     14.1 : 1.0
               offensive = True              neg : pos    =     14.1 : 1.0
                    rare = True              pos : neg    =     13.4 : 1.0
                mindless = True              neg : pos    =     13.1 : 1.0

In [300]:
nltk.classify.accuracy(classifier,test)

0.7792062067196717

In [299]:
save_classifier=open("/Users/paras/Desktop/pickledalgos/naivebayes5k.pickle","wb")
pickle.dump(classifier,save_classifier)
save_classifier.close()

In [302]:
MNB_classifier=SklearnClassifier(MultinomialNB())
MNB_classifier.train(train)
nltk.classify.accuracy(MNB_classifier,test)

0.7726981277250577

In [303]:
save_classifier=open("/Users/paras/Desktop/pickledalgos/MultinomialNB.pickle","wb")
pickle.dump(MNB_classifier,save_classifier)
save_classifier.close()

In [304]:
Bernoulli_NB=SklearnClassifier(BernoulliNB())
Bernoulli_NB.train(train)
nltk.classify.accuracy(Bernoulli_NB,test)

0.773211079764042

In [305]:
save_classifier=open("/Users/paras/Desktop/pickledalgos/BernoulliNB.pickle","wb")
pickle.dump(Bernoulli_NB,save_classifier)
save_classifier.close()

In [307]:
lg=SklearnClassifier(LogisticRegression())
lg.train(train)
nltk.classify.accuracy(lg,test)



0.7919979481918441

In [308]:
save_classifier=open("/Users/paras/Desktop/pickledalgos/LogisticRegression.pickle","wb")
pickle.dump(lg,save_classifier)
save_classifier.close()