# Steps for applying Multinomial Naive Bayes to NLP problems:

### Preprocessing the text data

#### cleaning texts

In [1]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
 
dataset = [["I liked the movie", "positive"],
           ["It’s a good movie. Nice story", "positive"],
           ["Hero’s acting is bad but heroine looks good.\
            Overall nice movie", "positive"],
            ["Nice songs. But sadly boring ending.", "negative"],
            ["sad movie, boring movie", "negative"]]
             
dataset = pd.DataFrame(dataset)
dataset.columns = ["Text", "Reviews"]
 
nltk.download('stopwords')
 
corpus = []
 
for i in range(0, 5):
    text = re.sub('[^a-zA-Z]', '', dataset['Text'][i])
    text = text.lower()
    text = text.split()
    ps = PorterStemmer()
    text = ''.join(text)
    corpus.append(text)

[nltk_data] Downloading package stopwords to C:\Users\Laptop
[nltk_data]     Home\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Feature extraction

#### creating bag of words model

In [2]:
cv = CountVectorizer(max_features = 1500)
 
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, 1].values

### Splitting the data

#### splitting the data set into training set and test set

In [3]:
from sklearn.model_selection import train_test_split
 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

### Training the MNB model

#### fitting naive bayes to the training set

In [4]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix

classifier = GaussianNB()
classifier.fit(X_train, y_train)

#### predicting test set results

In [5]:
y_pred = classifier.predict(X_test)

### Evaluating the performance of the model

#### making the confusion matrix

In [6]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[0, 0],
       [2, 0]], dtype=int64)