In [None]:
!pip install tensorflow_text

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow_text
  Downloading tensorflow_text-2.9.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)
[K     |████████████████████████████████| 4.6 MB 5.3 MB/s 
[?25hCollecting tensorflow<2.10,>=2.9.0
  Downloading tensorflow-2.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (511.7 MB)
[K     |████████████████████████████████| 511.7 MB 6.2 kB/s 
Collecting keras<2.10.0,>=2.9.0rc0
  Downloading keras-2.9.0-py2.py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 26.8 MB/s 
[?25hCollecting flatbuffers<2,>=1.12
  Downloading flatbuffers-1.12-py2.py3-none-any.whl (15 kB)
Collecting tensorflow-estimator<2.10.0,>=2.9.0rc0
  Downloading tensorflow_estimator-2.9.0-py2.py3-none-any.whl (438 kB)
[K     |████████████████████████████████| 438 kB 66.8 MB/s 
Collecting gast<=0.4.0,>=0.2.1
  Downloading gast-0.4.0-py3-none-any.whl (9.

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

In [None]:
print("No. of GPUS available:",len(tf.config.experimental.list_physical_devices('GPU')))

No. of GPUS available: 1


In [None]:
import pandas as pd
df = pd.read_csv("combined_data.csv",names=['text','label'])
df.head(5)

print(df.groupby('label').describe())
print("After removing Duplicates:")
df.drop_duplicates(subset='text',keep='first',inplace=True)
df.groupby('label').describe()


        text                                                               
       count unique                                                top freq
label                                                                      
0      11470   9985    haIZ....frOM laSt wEek i keEP on goiNg out.....    6
1       5709   4373  Mom's depression tied to kids' emotional, inte...   11
After removing Duplicates:


Unnamed: 0_level_0,text,text,text,text
Unnamed: 0_level_1,count,unique,top,freq
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,9985,9985,#Depressed mood can be caused by infectious di...,1
1,4371,4371,"With all of this unnessary family drama, I fe...",1


In [None]:
df_positive=df[df['label']==1]
df_negative=df[df['label']==0]
df_negative_down = df_negative.sample(df_positive.shape[0])
df_negative_down.shape

df_balanced = pd.concat([df_positive,df_negative_down])
df_balanced

Unnamed: 0,text,label
1,"With all of this unnessary family drama, I fe...",1
6,"I feel so sad because i can't be happy, and th...",1
7,I went to church with my mom and a huge chunk ...,1
8,No love here.... #lonely #depressed pic.twitt...,1
9,I see so many people moving forward with their...,1
...,...,...
10571,"@JeffParsons Yeah, I think I will too - that's...",0
7560,@CallieSink My pleasure and thank you,0
7014,@saidthewhale great photographs! saw you guys ...,0
13799,i JUST deleted a bunch of people off my myspac...,0


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_balanced['text'],df_balanced['label'],stratify=df_balanced['label'])

In [None]:
X_train.head(10)

16221    More like Depression*  https://twitter.com/ave...
16970    It's important to not identify with your illne...
2136       everything is over finally...i felt much hap...
3454      I haven't touched another person in 4 years. ...
591      I feel like he lost interest in me he doesn't ...
925      Honestly I wouldn't I get why im the last choi...
1255      I can absolutely relate to what you're going ...
15297    Me after slipping into another depression  htt...
1333      I take it that by you saying "Learn to give r...
13116    @iampattic I don't even know how that could ha...
Name: text, dtype: object

In [None]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

In [None]:
def get_sentence_embedding(sentences):
    preprocessed_text = bert_preprocess(sentences)
    return bert_encoder(preprocessed_text)['pooled_output']

get_sentence_embedding(["$110 discount, hurry up","Bhavin, are you up for a football game tonight?"])

<tf.Tensor: shape=(2, 768), dtype=float32, numpy=
array([[-0.83215076, -0.49884498, -0.83281636, ..., -0.6315708 ,
        -0.7261329 ,  0.9110628 ],
       [-0.8599927 , -0.50692946, -0.9453913 , ..., -0.8701028 ,
        -0.6732962 ,  0.8294471 ]], dtype=float32)>

In [None]:
#bert layers
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name="text")
preprocessed_text = bert_preprocess(text_input)
embeddings = bert_encoder(preprocessed_text)
print(embeddings['pooled_output'])
#neural network layers
l = tf.keras.layers.Dropout(0.1, name='dropout')(embeddings['pooled_output'])

l = tf.keras.layers.Dense(1,activation='sigmoid',name='output')(l)

#construct final model
model = tf.keras.Model(inputs=[text_input], outputs = [l])

In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None,)]            0           []                               
                                                                                                  
 keras_layer (KerasLayer)       {'input_word_ids':   0           ['text[0][0]']                   
                                (None, 128),                                                      
                                 'input_type_ids':                                                
                                (None, 128),                                                      
                                 'input_mask': (Non                                               
                                e, 128)}                                                      

In [None]:
METRICS = [
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
]
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=METRICS)

In [None]:
model.fit(X_train,y_train,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f4316bca0d0>

In [None]:
model.evaluate(X_test,y_test)



[0.3353291451931, 0.8732845187187195, 0.8306320905685425, 0.9377859234809875]

In [None]:
"""y_predicted = model.predict(X_test)
y_predicted = y_predicted.flatten()
y_predicted = np.where(y_predicted > 0.5 , 1, 0)
from sklearn.metrics import classification_report
print(classification_report(y_test,y_predicted))"""

In [None]:
model.save("/content/Bert_sigmoid/")



INFO:tensorflow:Assets written to: /content/Bert_sigmoid/assets


INFO:tensorflow:Assets written to: /content/Bert_sigmoid/assets


In [None]:
import joblib
 
joblib.dump(model, 'bert_sigmoid')
 



INFO:tensorflow:Assets written to: ram://731bc9e5-5448-4e73-8180-3e31b57aeca4/assets


INFO:tensorflow:Assets written to: ram://731bc9e5-5448-4e73-8180-3e31b57aeca4/assets


['bert_sigmoid']

In [None]:
model.save()

In [None]:
!pip install tensorflowjs
!tensorflowjs_converter --input_format=keras --weight_shard_size_bytes 4170000000 /content/bert_sigmoid.h5 /content/bertsigmoid_tfjs

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflowjs
  Downloading tensorflowjs-3.18.0-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 3.7 MB/s 
Collecting packaging~=20.9
  Downloading packaging-20.9-py2.py3-none-any.whl (40 kB)
[K     |████████████████████████████████| 40 kB 6.5 MB/s 
Installing collected packages: packaging, tensorflowjs
  Attempting uninstall: packaging
    Found existing installation: packaging 21.3
    Uninstalling packaging-21.3:
      Successfully uninstalled packaging-21.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.[0m
Successfully installed packaging-20.9 tensorflowjs-3.18.0


In [None]:
import joblib
 
joblib.dump(model, 'bert_sigmoid')

In [None]:
import pickle
 
pickle.dump(model, open('bert_sigmoid.pkl', 'wb'))



INFO:tensorflow:Assets written to: ram://9651cd8c-70cc-4819-8b5b-22148438dcd6/assets


INFO:tensorflow:Assets written to: ram://9651cd8c-70cc-4819-8b5b-22148438dcd6/assets
