In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [5]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy') > 0.95):
            print('\n 95% accuracy has been reached')
            self.model.stop_training = True
callbacks = myCallback()

In [6]:
vocab_size = 20000
embedding_dim = 200
max_length = 200
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_size = 174055

In [6]:
src = "Suicide_Detection.csv"
csv_reader = pd.read_csv(src)
csv_reader.loc[csv_reader['class'] == 'suicide', 'class'] = 1
csv_reader.loc[csv_reader['class'] == 'non-suicide', 'class'] = 0
training_sentences = csv_reader['text'].iloc[0:training_size]
training_labels = csv_reader['class'].iloc[0:training_size]
validation_sentences = csv_reader['text'].iloc[training_size:-1]
validation_labels = csv_reader['class'].iloc[training_size:-1]

In [7]:
tokenizer = Tokenizer(oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)
total_words = len(tokenizer.word_index) + 1

In [20]:
training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

validation_sequences = tokenizer.texts_to_sequences(validation_sentences)
validation_padded = pad_sequences(validation_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

In [25]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(120)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 200, 200)          34259200  
_________________________________________________________________
bidirectional_4 (Bidirection (None, 240)               308160    
_________________________________________________________________
dropout_1 (Dropout)          (None, 240)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 24)                5784      
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 25        
Total params: 34,573,169
Trainable params: 34,573,169
Non-trainable params: 0
_________________________________________________________________


In [26]:
num_epochs = 5
training_padded = np.array(training_padded)
training_labels = np.array(training_labels).astype('float32')
validation_padded = np.array(validation_padded)
validation_labels = np.array(validation_labels).astype('float32')
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(validation_padded, validation_labels), callbacks=[callbacks])

Epoch 1/5
Epoch 2/5

 90% accuracy has been reached


In [27]:
history.history

{'loss': [0.1926991492509842, 0.11591196060180664],
 'accuracy': [0.9284306764602661, 0.9580420255661011],
 'val_loss': [0.15046481788158417, 0.14684762060642242],
 'val_accuracy': [0.9456892609596252, 0.9480506181716919]}

In [89]:
# Question Example
# How often have you felt as though the future was bleak, over the past few weeks?
predicted_sentences = "I've felt it more frequently now, like the world doesn't want me live anymore. All of my dream and hope just vanish like it was nothing"
token_list = tokenizer.texts_to_sequences([predicted_sentences])[0]
token_list = pad_sequences([token_list], maxlen=max_length, padding=padding_type, truncating=trunc_type)
model.predict(token_list)

array([[0.9704542]], dtype=float32)

In [30]:
model.save('my_model')

INFO:tensorflow:Assets written to: my_model\assets
INFO:tensorflow:Assets written to: my_model\assets


In [8]:
new_model = tf.keras.models.load_model('my_model')

# Check its architecture
new_model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 200, 200)          34259200  
_________________________________________________________________
bidirectional_4 (Bidirection (None, 240)               308160    
_________________________________________________________________
dropout_1 (Dropout)          (None, 240)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 24)                5784      
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 25        
Total params: 34,573,169
Trainable params: 34,573,169
Non-trainable params: 0
_________________________________________________________________


In [20]:
# Question Example
# How often have you felt as though the future was bleak, over the past few weeks?
predicted_sentences = "I lost all my family, I don't have any will to live anymore"
token_list = tokenizer.texts_to_sequences([predicted_sentences])[0]
token_list = pad_sequences([token_list], maxlen=max_length, padding=padding_type, truncating=trunc_type)
new_model.predict(token_list)

array([[0.963423]], dtype=float32)