In [5]:
!pip install keras
!pip install tensorflow
import tensorflow as tf


import numpy as np
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras import layers
from keras import utils

# Load the 20 Newsgroups dataset
categories = ['alt.atheism', 'comp.graphics', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware',
              'comp.sys.mac.hardware', 'comp.windows.x', 'misc.forsale', 'rec.autos', 'rec.motorcycles',
              'rec.sport.baseball', 'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med',
              'sci.space', 'soc.religion.christian', 'talk.politics.guns', 'talk.politics.mideast',
              'talk.politics.misc', 'talk.religion.misc']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories, remove=('headers', 'footers', 'quotes'))
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories, remove=('headers', 'footers', 'quotes'))

# Vectorize the data
vectorizer = TfidfVectorizer(max_features=2000, stop_words='english')
X_train = vectorizer.fit_transform(newsgroups_train.data)
X_test = vectorizer.transform(newsgroups_test.data)
num_classes = len(categories)

# Convert labels to one-hot encoding
y_train = utils.to_categorical(newsgroups_train.target, num_classes)
y_test = utils.to_categorical(newsgroups_test.target, num_classes)

# Define the model
model = Sequential()
model.add(layers.Dense(512, input_shape=(X_train.shape[1],), activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train.toarray(), y_train, batch_size=32, epochs=5, verbose=1, validation_split=0.1)

# Evaluate the model
score = model.evaluate(X_test.toarray(), y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# Generate predictions
y_pred = model.predict(X_test.toarray())
y_pred_classes = np.argmax(y_pred, axis=1)
target_names = newsgroups_test.target_names
print(classification_report(np.argmax(y_test, axis=1), y_pred_classes, target_names=target_names))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 1.4236810207366943
Test accuracy: 0.5860329270362854
                          precision    recall  f1-score   support

             alt.atheism       0.37      0.43      0.39       319
           comp.graphics       0.53      0.63      0.58       389
 comp.os.ms-windows.misc       0.59      0.58      0.59       394
comp.sys.ibm.pc.hardware       0.60      0.52      0.56       392
   comp.sys.mac.hardware       0.64      0.54      0.59       385
          comp.windows.x       0.75      0.64      0.69       395
            misc.forsale       0.67      0.76      0.71       390
               rec.autos       0.39      0.67      0.49       396
         rec.motorcycles       0.66      0.57      0.61       398
      rec.sport.baseball       0.67      0.62      0.65       397
        rec.sport.hockey       0.83      0.73      0.78       399
               sci.crypt       0.77      0.65      0.71       396
         sci.electronics   