In [1]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, TfidfTransformer

In [2]:
from sklearn.naive_bayes import MultinomialNB

In [4]:
categories = {
 'comp.os.ms-windows.misc' : 'Computers',
 'rec.autos' : 'Autos',
 'rec.motorcycles' : 'Motorcycles',
 'rec.sport.baseball' : 'Baseball',
 'sci.electronics' : 'Electronics',
 'sci.med' : 'Medical',
 'sci.space' : 'Space',
 'talk.politics.misc' : 'Politics',
 'talk.religion.misc' : 'Religion'
}

dataset = fetch_20newsgroups(subset='train', categories=categories)

In [5]:
dataset.keys()

dict_keys(['target', 'DESCR', 'description', 'target_names', 'data', 'filenames'])

In [7]:
dataset.target_names

['comp.os.ms-windows.misc',
 'rec.autos',
 'rec.motorcycles',
 'rec.sport.baseball',
 'sci.electronics',
 'sci.med',
 'sci.space',
 'talk.politics.misc',
 'talk.religion.misc']

In [8]:
dataset.target

array([3, 8, 2, ..., 6, 0, 6], dtype=int64)

In [9]:
vect = CountVectorizer()
x_train = vect.fit_transform(dataset.data)

In [12]:
x_train.shape

(5000, 86621)

In [18]:
tfidf = TfidfTransformer()
x_train = tfidf.fit_transform(x_train)

clf = MultinomialNB()
clf.fit(x_train, dataset.target)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [19]:
input_data = [
    "That game was played between two teams and one of the player was holding the bat and hitting the ball all around the ground",
    "Recently Nasa was working on reusable rockets",
    "Apple is going to launch its new smart watches",
    "New medicines for cancer treatment has been introduced and will be shared to every hospital soon"
]

In [20]:
input_terms = vect.transform(input_data)
input_terms = tfidf.transform(input_terms)

In [21]:
y_pred = clf.predict(input_terms)

In [22]:
y_pred

array([3, 6, 6, 5], dtype=int64)

In [24]:
for sentence, category in zip(input_data, y_pred):
    print("\n Input :",sentence, "\nPredicted Category :",categories[dataset.target_names[category]])


 Input : That game was played between two teams and one of the player was holding the bat and hitting the ball all around the ground 
Predicted Category : Baseball

 Input : Recently Nasa was working on reusable rockets 
Predicted Category : Space

 Input : Apple is going to launch its new smart watches 
Predicted Category : Space

 Input : New medicines for cancer treatment has been introduced and will be shared to every hospital soon 
Predicted Category : Medical
