In [None]:
# import libraries
try:
  # %tensorflow_version only exists in Colab.
  !pip install tf-nightly
except Exception:
  pass
import tensorflow as tf
import pandas as pd
from tensorflow import keras
!pip install tensorflow-datasets
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import sequence
tfds.disable_progress_bar()

print(tf.__version__)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
2.14.0-dev20230615


In [None]:
# get data files
!wget https://cdn.freecodecamp.org/project-data/sms/train-data.tsv
!wget https://cdn.freecodecamp.org/project-data/sms/valid-data.tsv


In [None]:
train_file_path = "train-data.tsv"
test_file_path = "valid-data.tsv"

In [None]:
train_df=pd.read_csv(train_file_path, sep="\t", header=None, names=["type","msg"])
train_df.dropna()
train_df.head()

Unnamed: 0,type,msg
0,ham,ahhhh...just woken up!had a bad dream about u ...
1,ham,you can never do nothing
2,ham,"now u sound like manky scouse boy steve,like! ..."
3,ham,mum say we wan to go then go... then she can s...
4,ham,never y lei... i v lazy... got wat? dat day ü ...


In [None]:
test_df=pd.read_csv(test_file_path, sep="\t", header=None, names=["type","msg"])
test_df.dropna()
test_df.head()

Unnamed: 0,type,msg
0,ham,i am in hospital da. . i will return home in e...
1,ham,"not much, just some textin'. how bout you?"
2,ham,i probably won't eat at all today. i think i'm...
3,ham,don‘t give a flying monkeys wot they think and...
4,ham,who are you seeing?


In [None]:
train_df["type"]=pd.factorize(train_df["type"])[0]
test_df["type"]=pd.factorize(test_df["type"])[0]

train_labels= train_df["type"].values
train_data=tf.data.Dataset.from_tensor_slices((train_df["msg"].values, train_labels))

test_labels= test_df["type"].values
test_data=tf.data.Dataset.from_tensor_slices((test_df["msg"].values, test_labels))

test_data.element_spec


(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [None]:
BUFFER_SIZE = 100
BATCH_SIZE = 32
train_data = train_data.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_data = test_data.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
text_vector = TextVectorization(
    output_mode='int',
    max_tokens=1000,
    output_sequence_length=1000,
)

text_vector.adapt(train_data.map(lambda text, label: text))

In [None]:
vocab = np.array(text_vector.get_vocabulary())
vocab[:20]

In [None]:
model = tf.keras.Sequential([
    text_vector,
    tf.keras.layers.Embedding(
        len(text_vector.get_vocabulary()),
        64,
        mask_zero=True,
    ),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1)
])


model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(1e-4),
    metrics=['accuracy'],
)

In [None]:
history = model.fit(
    train_data,
    validation_data=test_data,
    validation_steps=30,
    epochs=10,
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
test_loss, test_acc = model.evaluate(test_data)
print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

Test Loss: 0.07752354443073273
Test Accuracy: 0.9877873659133911


In [None]:
# function to predict messages based on model
# (should return list containing prediction and label, ex. [0.008318834938108921, 'ham'])
def predict_message(pred_text):
  prediction = model.predict([pred_text])
  pred_n = prediction[0][0]
  if(pred_n<0.5):
    text_type="ham"
  else:
    text_type="spam"
  predictions=[pred_n, text_type]
  return (predictions)

pred_text = "how are you doing today?"

prediction = predict_message(pred_text)
print(prediction)

[-11.01214, 'ham']


In [None]:
# Run this cell to test your function and model. Do not modify contents.
def test_predictions():
  test_messages = ["how are you doing today",
                   "sale today! to stop texts call 98912460324",
                   "i dont want to go. can we try it a different day? available sat",
                   "our new mobile video service is live. just install on your phone to start watching.",
                   "you have won £1000 cash! call to claim your prize.",
                   "i'll bring it tomorrow. don't forget the milk.",
                   "wow, is your arm alright. that happened to me one time too"
                  ]

  test_answers = ["ham", "spam", "ham", "spam", "spam", "ham", "ham"]
  passed = True

  for msg, ans in zip(test_messages, test_answers):
    prediction = predict_message(msg)
    if prediction[1] != ans:
      passed = False

  if passed:
    print("You passed the challenge. Great job!")
  else:
    print("You haven't passed yet. Keep trying.")

test_predictions()


You passed the challenge. Great job!
