In the shortened version, the following changes were made:

1. Function and variable names were changed to use lowercase with underscores, following Python's naming conventions.
2. Unnecessary print statements were removed.
3. Code formatting was improved to make it more consistent and readable.
4. The code was organized into logical sections for better readability.

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

# Load and preprocess the dataset
msg = pd.read_csv('naivetext.csv', names=['message', 'label'])
X = msg.message
msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})
y = msg.labelnum

# Split the dataset into training and test sets
x_train, x_test, y_train, y_test = train_test_split(X, y)
print('The total number of Training Data:', y_train.shape)
print('The total number of Test Data:', y_test.shape)

# Convert text to numerical features
count_vect = CountVectorizer()
x_train_dtm = count_vect.fit_transform(x_train)
x_test_dtm = count_vect.transform(x_test)
print('\nThe words or Tokens in the text documents:')
print(count_vect.get_feature_names_out())

# Train the classifier and make predictions
clf = MultinomialNB().fit(x_train_dtm, y_train)
predicted = clf.predict(x_test_dtm)

# Evaluate the classifier
print('\nAccuracy of the classifier:', metrics.accuracy_score(y_test, predicted))
print('\nConfusion matrix:', metrics.confusion_matrix(y_test, predicted))
print('\nThe value of Precision:', metrics.precision_score(y_test, predicted))
print('\nThe value of Recall:', metrics.recall_score(y_test, predicted))

# Make predictions on new data
docs_new = ['I like this place', 'My boss is not my savior']
x_new_counts = count_vect.transform(docs_new)
predicted_new = clf.predict(x_new_counts)

for doc, category in zip(docs_new, predicted_new):
    print('%s -> %s' % (doc, msg.labelnum[category]))


The total number of Training Data: (13,)
The total number of Test Data: (5,)

The words or Tokens in the text documents:
['am' 'amazing' 'an' 'and' 'awesome' 'bad' 'best' 'boss' 'can' 'deal' 'do'
 'enemy' 'fun' 'good' 'great' 'have' 'holiday' 'horrible' 'house' 'is'
 'like' 'locality' 'love' 'my' 'not' 'of' 'place' 'restaurant' 'sandwich'
 'sick' 'stay' 'stuff' 'that' 'this' 'tired' 'to' 'today' 'tomorrow'
 'view' 'we' 'went' 'what' 'will' 'with' 'work']

Accuracy of the classifier: 0.8

Confusion matrix: [[2 0]
 [1 2]]

The value of Precision: 1.0

The value of Recall: 0.6666666666666666
I like this place -> 1
My boss is my savior -> 1
