## Multinomial Event Model

In [1]:
# Sample Data

x = ["This was an awesome movie",
    "Great movie! I Liked it a lot",
    "Happy Ending! awesome acting by the hero",
    "loved it! truly great",
    "bad not upto the mark",
    "could have been better",
    "Surely a Disappointing movie"]

y = [1,1,1,1,0,0,0] # 1-Positive, 0-Negative

In [2]:
x_test = ["I was happy & happy and I loved the acting in the movie",
         "The movie I saw was bad"]

### 1. Cleaning

In [3]:
import clean_text as ct

In [4]:
x_clean = [ct.getCleanReview(i) for i in x] # List Comprehension
xt_clean = [ct.getCleanReview(i) for i in x_test]

In [5]:
print(x_clean)

['awesom movi', 'great movi like lot', 'happi end awesom act hero', 'love truli great', 'bad upto mark', 'could better', 'sure disappoint movi']


In [6]:
print(xt_clean)

['happi happi love act movi', 'movi saw bad']


### 2. Vectorization

In [7]:
from sklearn.feature_extraction.text import CountVectorizer

In [8]:
cv = CountVectorizer(ngram_range=(1,2))

x_vec = cv.fit_transform(x_clean).toarray()
print(x_vec)

[[0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0]
 [1 1 1 1 0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0]
 [0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1]
 [0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0]]


In [9]:
print(x_vec.shape)

(7, 33)


In [10]:
print(cv.get_feature_names())

['act', 'act hero', 'awesom', 'awesom act', 'awesom movi', 'bad', 'bad upto', 'better', 'could', 'could better', 'disappoint', 'disappoint movi', 'end', 'end awesom', 'great', 'great movi', 'happi', 'happi end', 'hero', 'like', 'like lot', 'lot', 'love', 'love truli', 'mark', 'movi', 'movi like', 'sure', 'sure disappoint', 'truli', 'truli great', 'upto', 'upto mark']


In [11]:
# Vectorization on Test Set

xt_vec = cv.transform(xt_clean).toarray()
print(xt_vec)

[[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]]


In [12]:
print(cv.get_feature_names())

['act', 'act hero', 'awesom', 'awesom act', 'awesom movi', 'bad', 'bad upto', 'better', 'could', 'could better', 'disappoint', 'disappoint movi', 'end', 'end awesom', 'great', 'great movi', 'happi', 'happi end', 'hero', 'like', 'like lot', 'lot', 'love', 'love truli', 'mark', 'movi', 'movi like', 'sure', 'sure disappoint', 'truli', 'truli great', 'upto', 'upto mark']


In [13]:
print(xt_vec.shape)

(2, 33)


### 3. Multinomial Naive Bayes

In [14]:
from sklearn.naive_bayes import MultinomialNB, BernoulliNB, GaussianNB

In [15]:
mnb = MultinomialNB()
print(mnb)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)


In [16]:
# Training
mnb.fit(x_vec, y)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [17]:
# Predictions
mnb.predict(xt_vec)

array([1, 0])

In [18]:
mnb.predict_proba(xt_vec)

array([[0.0836554, 0.9163446],
       [0.6055918, 0.3944082]])

In [19]:
mnb.score(x_vec, y)

1.0

### 4. Multivariate Bernoulli Event Model Naive Bayes

In [20]:
bnb = BernoulliNB(binarize=0.0)

In [21]:
print(bnb)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)


In [22]:
bnb.fit(x_vec, y)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

In [23]:
bnb.predict_proba(xt_vec)

array([[0.09025538, 0.90974462],
       [0.72569589, 0.27430411]])

In [24]:
bnb.predict(xt_vec)

array([1, 0])

In [25]:
bnb.score(x_vec, y)

1.0