In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

msg = pd.read_csv('naivetext.csv', names=['message', 'label'])
print('The dimensions of the dataset:', msg.shape)

msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})

X = msg.message
y = msg.labelnum
print("Messages (X):", X)
print("Labels (y):", y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
print('The total number of Training Data:', y_train.shape[0])
print('The total number of Test Data:', y_test.shape[0])

# Convert text to word frequency vectors
count_vect = CountVectorizer()
X_train_dtm = count_vect.fit_transform(X_train)
X_test_dtm = count_vect.transform(X_test)

print("\nThe words or Tokens in the text documents:")
print(count_vect.get_feature_names_out())

# Convert the document-term matrix to a DataFrame
df = pd.DataFrame(X_train_dtm.toarray(), columns=count_vect.get_feature_names_out())

# Training Naive Bayes (NB) classifier on training data
clf = MultinomialNB().fit(X_train_dtm, y_train)

# Predict the test data
predicted = clf.predict(X_test_dtm)

# Printing accuracy, Confusion matrix, Precision, and Recall
print("\nAccuracy of the classifier is:", metrics.accuracy_score(y_test, predicted))
print("\nConfusion matrix:")
print(metrics.confusion_matrix(y_test, predicted))
print("\nThe value of Precision:", metrics.precision_score(y_test, predicted))
print("The value of Recall:", metrics.recall_score(y_test, predicted))


The dimensions of the dataset: (18, 2)
Messages (X): 0                      I love this sandwich
1                  This is an amazing place
2        I feel very good about these beers
3                      This is my best work
4                      What an awesome view
5             I do not like this restaurant
6                  I am tired of this stuff
7                    I can't deal with this
8                      He is my sworn enemy
9                       My boss is horrible
10                 This is an awesome place
11    I do not like the taste of this juice
12                          I love to dance
13        I am sick and tired of this place
14                     What a great holiday
15           That is a bad locality to stay
16           We will have good fun tomorrow
17         I went to my enemy's house today
Name: message, dtype: object
Labels (y): 0     1
1     1
2     1
3     1
4     1
5     0
6     0
7     0
8     0
9     0
10    1
11    0
12    1
13    0
14