1. Import Necessary Libraries

In [11]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

2. Create a Custom Dataset:
<span>Here we have created a custom data set which help us to perform the classification</span>

In [14]:
data = {
    'message': [
        "Congratulations, you've won a free ticket to the Bahamas! Call now!",
        "Hey, are we still meeting at the cafe tomorrow?",
        "Get cheap meds online, click here for a discount!",
        "Reminder: Your appointment is scheduled for next Wednesday.",
        "Win a $1000 gift card by completing this survey!",
        "Don't forget to submit the assignment by tonight.",
        "You've been selected for a special prize! Visit our website.",
        "Let's catch up over lunch this weekend.",
        "Limited time offer, buy one get one free!",
        "Your order has been shipped and will arrive by Friday."
    ],
    'label': ['spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham']
}

df = pd.DataFrame(data)


3. PreProcess the Data

 ###### TF-IDF: ``.\Tutorials\5 NLP Using ML\TF-IDF.md``

In [12]:
# Encode labels
encoder = LabelEncoder()
df['label'] = encoder.fit_transform(df['label']) # we have converted the spam or ham value to a numeric values
df.head()

Unnamed: 0,message,label
0,"Congratulations, you've won a free ticket to t...",1
1,"Hey, are we still meeting at the cafe tomorrow?",0
2,"Get cheap meds online, click here for a discount!",1
3,Reminder: Your appointment is scheduled for ne...,0
4,Win a $1000 gift card by completing this survey!,1


In [8]:

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

# Tokenize text and convert to sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Pad sequences to ensure uniform length
maxlen = 50
X_train_pad = pad_sequences(X_train_seq, padding='post', maxlen=maxlen)
X_test_pad = pad_sequences(X_test_seq, padding='post', maxlen=maxlen)

In [9]:
X_train_pad

array([[10, 11,  4, 12,  5, 13,  2, 14,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0],
       [15,  6, 16,  1, 17, 18,  4,  5, 19, 20, 21,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0],
       [22, 23, 24, 25, 26,  7, 27,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0],
       [28, 29, 30, 31, 32, 33,  3,  1, 34,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0],
       [ 8, 35, 36,  9, 37, 38, 39, 40,  2, 41,  0,  0,  0,  0,  0, 

Step 4: Build the Neural Network Model

Explanation:
Data Preparation: We encode the labels ('spam' and 'ham') into numerical form using LabelEncoder. Text messages are tokenized using Tokenizer from Keras and then padded to ensure uniform length for neural network input.

Model Building:

An Embedding layer converts text sequences into dense vectors of fixed size.
GlobalAveragePooling1D averages over the sequence dimension to flatten the input.
Two Dense layers with relu and sigmoid activations for classification.
Training and Evaluation: The model is trained using binary_crossentropy as the loss function and adam optimizer. Accuracy is evaluated on the test set.

Adjust the model architecture, tokenizer parameters, and training epochs as needed based on performance and specific requirements. This example provides a basic framework for text classification using TensorFlow with your provided data.

In [10]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=64, input_length=maxlen),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()



5. Evaluate the Model

In [5]:
# Predict the labels for the test set
y_pred = model.predict(X_test_vec)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, pos_label='spam')
recall = recall_score(y_test, y_pred, pos_label='spam')
f1 = f1_score(y_test, y_pred, pos_label='spam')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-score: {f1}")

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-score: 1.0
