In [1]:
import nltk
nltk.download('movie_reviews')
nltk.download('stopwords')

[nltk_data] Downloading package movie_reviews to
[nltk_data]     /home/mamzi/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
[nltk_data] Downloading package stopwords to /home/mamzi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
from nltk.corpus import movie_reviews, stopwords
import string
import random

reviews = []
for category in movie_reviews.categories():
    for file_id in movie_reviews.fileids(category):
        reviews.append((movie_reviews.words(file_id), category))

random.shuffle(reviews)

In [3]:
words_bag = list(map(str.lower, movie_reviews.words()))

english_stopwords = stopwords.words('english')
english_punctuations = list(string.punctuation)
remove_words = english_stopwords + english_punctuations

words_bag = [word for word in words_bag if word not in remove_words]

In [4]:
from nltk import FreqDist
words_frequency = FreqDist(words_bag)
common_words = words_frequency.most_common(1000)
feature_words = [word for word, freq in common_words]


In [5]:
def get_review_features(review):
    review_words = set([word.lower() for word in review])
    features = {}
    for word in feature_words:
        features[word] = (word in review_words)
    return features

review_features = [(get_review_features(rev), category) for rev, category in reviews]

In [6]:
from sklearn.model_selection import KFold
from nltk import NaiveBayesClassifier
from nltk.metrics.scores import recall, precision, f_measure
from nltk.classify import accuracy

kf = KFold(n_splits=5)
results = []
for train_index, test_index in kf.split(review_features):
    train_set = [review_features[i] for i in train_index]
    test_set = [review_features[i] for i in test_index]
    
    classifier = NaiveBayesClassifier.train(train_set)
    ref_sets = {'pos': set(), 'neg': set()}
    test_sets = {'pos': set(), 'neg': set()}

    for i, (feats, label) in enumerate(test_set):
        ref_sets[label].add(i)
        observed = classifier.classify(feats)
        test_sets[observed].add(i)
    
    res = dict()
    
    res['pos_precision'] = precision(ref_sets['pos'], test_sets['pos'])
    res['neg_precision'] = precision(ref_sets['neg'], test_sets['neg'])
    
    res['pos_recall'] = recall(ref_sets['pos'], test_sets['pos'])
    res['neg_recall'] = recall(ref_sets['neg'], test_sets['neg'])
    
    res['pos_f_measure'] = f_measure(ref_sets['pos'], test_sets['pos'])
    res['neg_f_measure'] = f_measure(ref_sets['neg'], test_sets['neg'])
     
    res['accuracy'] = accuracy(classifier, test_set)
    
    res['classifier'] = classifier
    
    results.append(res)


In [8]:
results

for res in results:
    classifier = res['classifier']
    print (classifier.show_most_informative_features(10))    

Most Informative Features
              ridiculous = True              neg : pos    =      5.1 : 1.0
                   awful = True              neg : pos    =      4.7 : 1.0
                   waste = True              neg : pos    =      4.6 : 1.0
                   worst = True              neg : pos    =      4.6 : 1.0
               memorable = True              pos : neg    =      4.3 : 1.0
                  stupid = True              neg : pos    =      3.9 : 1.0
                    dull = True              neg : pos    =      3.7 : 1.0
                  subtle = True              pos : neg    =      3.6 : 1.0
                    mess = True              neg : pos    =      3.4 : 1.0
                  boring = True              neg : pos    =      3.3 : 1.0
None
Most Informative Features
                   waste = True              neg : pos    =      5.3 : 1.0
              ridiculous = True              neg : pos    =      4.8 : 1.0
                   awful = True            