<a href="https://colab.research.google.com/github/sneharreddy25/project_2025/blob/main/Project_Text_classification_using_CNN_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Project: Text classification using CNN Implementation

In [2]:
!pip install tensorflow



In [3]:
pip install keras



In [4]:
# importing the necessary libraries
import numpy as np
from keras.models import Sequential
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from keras.preprocessing.sequence import pad_sequences
from keras.datasets import imdb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Setting up the parameters
maximum_features = 5000  # Maximum number of words to consider as features
maximum_length = 100  # Maximum length of input sequences
word_embedding_dims = 50  # Dimension of word embeddings
no_of_filters = 250  # Number of filters in the convolutional layer
kernel_size = 3  # Size of the convolutional filters
hidden_dims = 250  # Number of neurons in the hidden layer
batch_size = 32  # Batch size for training
epochs = 2  # Number of training epochs
threshold = 0.5  # Threshold for binary classification

# Loading the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=maximum_features)

# Padding the sequences to ensure uniform length
x_train = pad_sequences(x_train, maxlen=maximum_length)
x_test = pad_sequences(x_test, maxlen=maximum_length)

# Building the model
model = Sequential()

# Adding the embedding layer to convert input sequences to dense vectors
model.add(Embedding(maximum_features, word_embedding_dims,
                    input_length=maximum_length))

# Adding the 1D convolutional layer with ReLU activation
model.add(Conv1D(no_of_filters, kernel_size, padding='valid',
                 activation='relu', strides=1))

# Adding the global max pooling layer to reduce dimensionality
model.add(GlobalMaxPooling1D())

# Adding the dense hidden layer with ReLU activation
model.add(Dense(hidden_dims, activation='relu'))

# Adding the output layer with sigmoid activation for binary classification
model.add(Dense(1, activation='sigmoid'))

# Compiling the model with binary cross-entropy loss and Adam optimizer
model.compile(loss='binary_crossentropy',
              optimizer='adam', metrics=['accuracy'])

# Training the model
model.fit(x_train, y_train, batch_size=batch_size,
          epochs=epochs, validation_data=(x_test, y_test))

# Predicting the probabilities for test data
y_pred_prob = model.predict(x_test)

# Converting the probabilities to binary classes based on threshold
y_pred = (y_pred_prob > threshold).astype(int)

# Calculating the evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Printing the evaluation metrics
print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/2




[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 30ms/step - accuracy: 0.7120 - loss: 0.5206 - val_accuracy: 0.8452 - val_loss: 0.3472
Epoch 2/2
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 29ms/step - accuracy: 0.9018 - loss: 0.2440 - val_accuracy: 0.8588 - val_loss: 0.3271
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step
Accuracy: 0.8588
Precision: 0.8448938788065211
Recall: 0.87896
F1-score: 0.8615903387703889
