In [None]:
# import libraries
try:
  # %tensorflow_version only exists in Colab.
  !pip install tensorflow
except Exception:
  pass
import tensorflow as tf
import pandas as pd
from tensorflow import keras
!pip install tensorflow-datasets
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt

from keras.preprocessing import sequence

#print(tf.__version__)

In [None]:
# get data files
!wget https://cdn.freecodecamp.org/project-data/sms/train-data.tsv
!wget https://cdn.freecodecamp.org/project-data/sms/valid-data.tsv

train_file_path = "train-data.tsv"
test_file_path = "valid-data.tsv"

In [None]:
# Encoder

vocab = {}
word_encoding = 1
def one_hot_encoding(text):
  global word_encoding

  words = text.lower().split(" ")
  encoding = []

  for word in words:
    if word in vocab:
      code = vocab[word]
      encoding.append(code)
    else:
      vocab[word] = word_encoding
      encoding.append(word_encoding)
      word_encoding += 1

  return encoding


In [None]:
# Read data

train_data = pd.read_table(train_file_path, names=["label", "text"])
test_data = pd.read_table(test_file_path, names=["label", "text"])

train_labels = train_data.pop('label').to_frame()
test_labels = test_data.pop('label').to_frame()

# Labes (what to predict)

train_labels.columns =['label']
test_labels.columns =['label']

# Text categorical columns to numeric
train_labels['label'] = train_labels['label'].map({"ham": 0, "spam": 1})
test_labels['label'] = test_labels['label'].map({"ham": 0, "spam": 1})



In [None]:
# Encode texts to integers

train_data["text"] = train_data["text"].apply(one_hot_encoding)
test_data["text"] = test_data["text"].apply(one_hot_encoding)

In [None]:
# Check encoding

print("train_data\n", train_data['text'][9])


In [None]:
# Set text length to same size for modeling for every text

MAXLEN = 250

train_data = tf.keras.utils.pad_sequences(train_data['text'], MAXLEN)

test_data = tf.keras.utils.pad_sequences(test_data['text'], MAXLEN)

In [None]:
# Create model

VOCAB_SIZE = 88584

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, 32),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(1, activation="sigmoid")
])



In [None]:
# Compile model

model.compile(loss="binary_crossentropy",optimizer="rmsprop",metrics=['acc'])

history = model.fit(train_data, train_labels, epochs=10, validation_split=0.2)

In [None]:
# Evaluate model

results = model.evaluate(test_data, test_labels)
print(results)

In [None]:
# Model summary

model.summary()

In [None]:
# function to predict messages based on model
# (should return list containing prediction and label, ex. [0.008318834938108921, 'ham'])

def predict_message(pred_text):

  # Encode text to predict, and put it to numpy array
  encoded_text = np.array(one_hot_encoding(pred_text))

  # Fill zeros to get to same length as every text
  zeros_to_fill = MAXLEN - encoded_text.size
  encoded_text = np.pad(encoded_text, (zeros_to_fill, 0), 'constant')

  # Shape for model prediction
  pred = np.zeros((1,250))
  pred[0] = encoded_text

  # Predict
  result = model.predict(pred)

  # Initialize result
  result_text = ""

  if result > 0.5 :
    result_text = "spam"
  else : result_text = "ham"

  prediction = [result, result_text]

  return (prediction)

pred_text = "how are you doing today?"

prediction = predict_message(pred_text)
print(prediction)

In [None]:
# Run this cell to test your function and model. Do not modify contents.
def test_predictions():
  test_messages = ["how are you doing today",
                   "sale today! to stop texts call 98912460324",
                   "i dont want to go. can we try it a different day? available sat",
                   "our new mobile video service is live. just install on your phone to start watching.",
                   "you have won £1000 cash! call to claim your prize.",
                   "i'll bring it tomorrow. don't forget the milk.",
                   "wow, is your arm alright. that happened to me one time too"
                  ]

  test_answers = ["ham", "spam", "ham", "spam", "spam", "ham", "ham"]
  passed = True

  for msg, ans in zip(test_messages, test_answers):
    prediction = predict_message(msg)
    if prediction[1] != ans:
      passed = False

  if passed:
    print("You passed the challenge. Great job!")
  else:
    print("You haven't passed yet. Keep trying.")

test_predictions()
