# Voting on number of classifying algorithms

Using just one classifier gives different accuracy from one training set to another, so running several classifiers and using a mechanizm to score and vote for classifiers, as well as see which results are supported by several classifiers, will allow us to raise accuracy and reliability of the results.

### Usual corpus -> feature sets setup.
(Same as in sample_txt_classifiers and nb_txt_classifier)

In [None]:
import nltk
import random
from nltk.corpus import movie_reviews

documents = [(list(movie_reviews.words(fileid)), category)
            for category in movie_reviews.categories()
            for fileid in movie_reviews.fileids(category)]

# Select most common words in all reviews (positive and negative)¶
all_words = []
for w in movie_reviews.words():
    all_words.append(w.lower())
    
# Convert all_words from a list to a frequency distribution.
all_words = nltk.FreqDist(all_words)
all_words.most_common(15)

# Select only top commonly used words.
word_features = [wordFreq[0] for wordFreq in all_words.most_common(3000)]

def find_features(document):
    words = set(document)  ## pick only unique words in the review
    features = {}
    for w in words:
        features[w] = (w in word_features)
    return features

feature_sets = [(find_features(review_words),category) for review_words, category in documents]
feature_sets[0]

In [None]:
# Randomise items in the feature set to select test and training sets randomly.
random.shuffle(feature_sets)

# Select training and test sets
training_set = feature_sets[:1900]
testing_set = feature_sets[1900:]

## Set up and train all the classifiers that we want to use in the voting classifier

In [None]:
from nltk.classify.scikitlearn import SklearnClassifier  ## wrapper for scikitlearn in nltk
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import LogisticRegression, SGDClassifier # Stochastic Gradiant Descent
from sklearn.svm import SVC, LinearSVC, NuSVC

simple_NB_classifier = SklearnClassifier(LogisticRegression())
simple_NB_classifier.train(training_set)

mnb_classifier = SklearnClassifier(MultinomialNB())
mnb_classifier.train(training_set)

bnb_classifier = SklearnClassifier(BernoulliNB())
bnb_classifier.train(training_set)

lr_classifier = SklearnClassifier(LogisticRegression())
lr_classifier.train(training_set)

sgd_classifier = SklearnClassifier(SGDClassifier())
sgd_classifier.train(training_set)

svc_classifier = SklearnClassifier(SVC())
svc_classifier.train(training_set)

lsvc_classifier = SklearnClassifier(LinearSVC())
lsvc_classifier.train(training_set)

nsvc_classifier = SklearnClassifier(NuSVC())
nsvc_classifier.train(training_set)

## Create a voting classifier class

In [None]:
from nltk.classify import ClassifierI
from statistics import mode

class VoteClassifier(ClassifierI):
    def __init__(self, *classifiers):
        self._classifiers = classifiers
        
    def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)
    
    def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        
        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        
        return conf



## Set up and run the voting classifier

In [None]:
voted_classifier = VoteClassifier(simple_NB_classifier, mnb_classifier, bnb_classifier, lr_classifier, 
                                  sgd_classifier, svc_classifier, lsvc_classifier, nsvc_classifier)

classifier_accuracy = nltk.classify.accuracy(voted_classifier, testing_set)

In [None]:
classifier_accuracy

See some examples by classifying some instances with the voted classifier

In [None]:
voted_classifier.classify(testing_set[0][0])


In [None]:
voted_classifier.confidence(testing_set[0[0]])