<a href="https://colab.research.google.com/github/pavankumarkasula73/Deep-Learning-With-Natural-Language-Processing/blob/main/Use_the_Keras_deep_learning_library_and_split_words_with_(text_to_word_sequence).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Install Keras**

In [1]:
pip install keras



# **Import Required Libraries**

In [5]:
from keras.preprocessing.text import text_to_word_sequence, Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split
import numpy as np

# **Load the Dataset**

In [3]:
# Sample dataset
texts = [
    "Keras is an open-source software library that provides a Python interface for artificial neural networks.",
    "Keras acts as an interface for the TensorFlow library.",
    "It was developed by François Chollet, a Google engineer.",
    "Keras allows for easy and fast prototyping, supports both convolutional networks and recurrent networks, and runs seamlessly on CPU and GPU."
]

# **Preprocess the Text Data**

In [4]:
# Preprocess the text data
word_sequences = [text_to_word_sequence(text) for text in texts]

# Print the result
for i, sequence in enumerate(word_sequences):
    print(f"Text {i+1}: {sequence}")

Text 1: ['keras', 'is', 'an', 'open', 'source', 'software', 'library', 'that', 'provides', 'a', 'python', 'interface', 'for', 'artificial', 'neural', 'networks']
Text 2: ['keras', 'acts', 'as', 'an', 'interface', 'for', 'the', 'tensorflow', 'library']
Text 3: ['it', 'was', 'developed', 'by', 'françois', 'chollet', 'a', 'google', 'engineer']
Text 4: ['keras', 'allows', 'for', 'easy', 'and', 'fast', 'prototyping', 'supports', 'both', 'convolutional', 'networks', 'and', 'recurrent', 'networks', 'and', 'runs', 'seamlessly', 'on', 'cpu', 'and', 'gpu']


# **Tokenize the Text Data**

In [6]:
# Initialize the tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(texts)

# Print the sequences
for i, sequence in enumerate(sequences):
    print(f"Text {i+1} sequence: {sequence}")

Text 1 sequence: [2, 9, 5, 10, 11, 12, 6, 13, 14, 7, 15, 8, 3, 16, 17, 4]
Text 2 sequence: [2, 18, 19, 5, 8, 3, 20, 21, 6]
Text 3 sequence: [22, 23, 24, 25, 26, 27, 7, 28, 29]
Text 4 sequence: [2, 30, 3, 31, 1, 32, 33, 34, 35, 36, 4, 1, 37, 4, 1, 38, 39, 40, 41, 1, 42]


# **Pad the Sequences**

In [7]:
# Pad the sequences
max_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length)

# Print the padded sequences
print("Padded sequences:")
print(padded_sequences)

Padded sequences:
[[ 0  0  0  0  0  2  9  5 10 11 12  6 13 14  7 15  8  3 16 17  4]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  2 18 19  5  8  3 20 21  6]
 [ 0  0  0  0  0  0  0  0  0  0  0  0 22 23 24 25 26 27  7 28 29]
 [ 2 30  3 31  1 32 33 34 35 36  4  1 37  4  1 38 39 40 41  1 42]]


# **Define the model**

In [8]:
# Define the model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=50, input_length=max_length))
model.add(LSTM(units=50))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 21, 50)            2150      
                                                                 
 lstm (LSTM)                 (None, 50)                20200     
                                                                 
 dense (Dense)               (None, 1)                 51        
                                                                 
Total params: 22401 (87.50 KB)
Trainable params: 22401 (87.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# **prepare data for training**

In [9]:
# Sample labels (e.g., binary classification)
labels = np.array([1, 0, 1, 0])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# **Train the model**

In [10]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=2, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7a7a11ee2fe0>

# **Evaluate the model**

In [11]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

Test Loss: 1.0169551372528076
Test Accuracy: 0.0
