In [47]:
import tensorflow as tf
import pandas as pd
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import sequence

In [None]:
train_file_path = "data/train-data.tsv"
test_file_path = "data/valid-data.tsv"

In [51]:
train_df=pd.read_csv(train_file_path, sep="\t", header=None, names=["type","msg"])
train_df.dropna()
train_df.head()

Unnamed: 0,type,msg
0,ham,ahhhh...just woken up!had a bad dream about u ...
1,ham,you can never do nothing
2,ham,"now u sound like manky scouse boy steve,like! ..."
3,ham,mum say we wan to go then go... then she can s...
4,ham,never y lei... i v lazy... got wat? dat day ü ...


In [53]:
test_df=pd.read_csv(test_file_path, sep="\t", header=None, names=["type","msg"])
test_df.dropna()
test_df.head()

Unnamed: 0,type,msg
0,ham,i am in hospital da. . i will return home in e...
1,ham,"not much, just some textin'. how bout you?"
2,ham,i probably won't eat at all today. i think i'm...
3,ham,don‘t give a flying monkeys wot they think and...
4,ham,who are you seeing?


In [55]:
train_df["type"]=pd.factorize(train_df["type"])[0]
test_df["type"]=pd.factorize(test_df["type"])[0]

train_labels= train_df["type"].values
train_data=tf.data.Dataset.from_tensor_slices((train_df["msg"].values, train_labels))

test_labels= test_df["type"].values
test_data=tf.data.Dataset.from_tensor_slices((test_df["msg"].values, test_labels))

In [15]:
BUFFER_SIZE = 100
BATCH_SIZE = 32
train_data = train_data.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_data = test_data.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [19]:
text_vector = keras.layers.TextVectorization(
    output_mode='int',
    max_tokens=1000,
    output_sequence_length=1000,
)

text_vector.adapt(train_data.map(lambda text, label: text))

In [21]:
vocab = np.array(text_vector.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'to', 'i', 'you', 'a', 'the', 'u', 'and', 'in', 'is',
       'me', 'my', 'for', 'your', 'of', 'it', 'call', 'have', 'on'],
      dtype='<U15')

In [23]:
model = tf.keras.Sequential([
    text_vector,
    tf.keras.layers.Embedding(
        len(text_vector.get_vocabulary()),
        64,
        mask_zero=True,
    ),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1)
])


model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(1e-4),
    metrics=['accuracy'],
)

In [25]:
history = model.fit(
    train_data,
    validation_data=test_data,
    validation_steps=30,
    epochs=10,
)

Epoch 1/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 614ms/step - accuracy: 0.8704 - loss: 0.6461 - val_accuracy: 0.8604 - val_loss: 0.4606
Epoch 2/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 583ms/step - accuracy: 0.8692 - loss: 0.3891 - val_accuracy: 0.8760 - val_loss: 0.1827
Epoch 3/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 586ms/step - accuracy: 0.9460 - loss: 0.1409 - val_accuracy: 0.9698 - val_loss: 0.0937
Epoch 4/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 593ms/step - accuracy: 0.9776 - loss: 0.0775 - val_accuracy: 0.9740 - val_loss: 0.0717
Epoch 5/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 599ms/step - accuracy: 0.9855 - loss: 0.0573 - val_accuracy: 0.9823 - val_loss: 0.0611
Epoch 6/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 606ms/step - accuracy: 0.9874 - loss: 0.0447 - val_accuracy: 0.9896 - val_loss: 0.0566
Epoch 7/10

In [27]:
test_loss, test_acc = model.evaluate(test_data)
print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 152ms/step - accuracy: 0.9873 - loss: 0.0637
Test Loss: 0.06216467544436455
Test Accuracy: 0.9856321811676025


In [29]:
model.save('text_model.keras')