<a href="https://colab.research.google.com/github/pedrov718/classifying_mental_health_with_NN/blob/main/nueral_network_classifyer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [95]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_hub as hub
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from tensorflow.keras.layers import Flatten
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from keras.utils import np_utils
from sklearn import preprocessing

In [96]:
posts = pd.read_excel("/content/mental_health_posts_scraped.xlsx", index_col= 'Unnamed: 0')

In [97]:
posts.rename(columns = {"Origin":'label'}, inplace = True)

In [98]:
labels = pd.get_dummies(posts.label, drop_first= False)

In [99]:
posts = posts.join(labels)

In [100]:
posts.sample(1)

Unnamed: 0,Title,Post Text,ID,Score,Total Comments,label,Post URL,text,Anxiety,BPD,autism,bipolar,depression,schizophrenia
613,Don’t want to sleep because tomorrow will come...,All I want to do is get in bed and sleep forev...,fdnzub,1662,91,depression,https://www.reddit.com/r/depression/comments/f...,Don’t want to sleep because tomorrow will come...,0,0,0,0,1,0


In [101]:
posts.columns[7:].values

array(['text', 'Anxiety', 'BPD', 'autism', 'bipolar', 'depression',
       'schizophrenia'], dtype=object)

In [102]:
df = posts[posts.columns[7:].values]

In [103]:
df[posts.columns[8:].values].sum()

Anxiety          3119
BPD              3260
autism           3444
bipolar          3272
depression       3301
schizophrenia    3291
dtype: int64

In [104]:
train, val, test = np.split(df.sample(frac=1), [int(0.8*len(df)), int(0.9*len(df))])

In [105]:
def df_to_dataset(dataframe, shuffle=True, batch_size=1024):
  
  df = dataframe.copy()
  labels = df[posts.columns[8:].values]

  df = df["text"]

  ds = tf.data.Dataset.from_tensor_slices((df, labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  ds = ds.prefetch(tf.data.AUTOTUNE)
  return ds

In [106]:
train_data = df_to_dataset(train)
valid_data = df_to_dataset(val)
test_data = df_to_dataset(test)

## Embedding the model

In [107]:
embedding = "https://tfhub.dev/google/nnlm-en-dim50/2"
hub_layer = hub.KerasLayer(embedding, dtype=tf.string, trainable=True)

In [108]:
hub_layer(list(train_data)[0][0])

<tf.Tensor: shape=(1024, 50), dtype=float32, numpy=
array([[ 0.18635641, -0.02071199, -0.35330382, ..., -0.09158319,
         0.22433645,  0.17418337],
       [ 0.31400838,  0.09124126, -0.49660453, ..., -0.38878542,
         0.02635207,  0.05250515],
       [ 0.3781169 , -0.30648866, -0.08748874, ..., -0.39428183,
        -0.06089247, -0.02412256],
       ...,
       [-0.02883809,  0.02763822, -0.16357765, ...,  0.05677037,
         0.03618642, -0.04855941],
       [ 0.8224237 ,  0.1410026 , -0.38157764, ..., -0.4366896 ,
        -0.04358026,  0.22820275],
       [ 1.3415587 ,  0.21432193, -0.5641905 , ..., -0.56641334,
         0.04301087, -0.34579104]], dtype=float32)>

In [109]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(32, activation='relu'))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(6, activation='sigmoid'))

In [110]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [111]:
model.evaluate(train_data)



[0.7390820384025574, 0.16667725145816803]

In [112]:
model.evaluate(valid_data)



[0.7417339086532593, 0.1686135083436966]

In [113]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3, restore_best_weights= True)

In [114]:
history = model.fit(train_data, epochs=1000, validation_data=valid_data, callbacks=[callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [115]:
model.evaluate(test_data)



[0.8166742920875549, 0.641950249671936]

# LSTM

In [116]:
encoder = tf.keras.layers.TextVectorization(max_tokens=2000)
encoder.adapt(train_data.map(lambda text, label: text))

In [117]:
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'i', 'to', 'and', 'the', 'a', 'my', 'of', 'it', 'me',
       'that', 'in', 'is', 'but', 'for', 'have', 'this', 'with', 'was'],
      dtype='<U15')

In [118]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=32,
        mask_zero=True
    ),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(6, activation='sigmoid')
])

In [119]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [120]:
model.evaluate(train_data)
model.evaluate(valid_data)

ValueError: ignored

In [None]:
history = model.fit(train_data, epochs=5, validation_data=valid_data)

In [None]:
model.evaluate(test_data)