In [4]:
import numpy as np
import pandas as pd
!pip install -q -U "tensorflow-text==2.8.*"

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
data = pd.read_csv('/content/drive/MyDrive/movie.csv')

In [7]:
data.head()

Unnamed: 0,text,label
0,I grew up (b. 1965) watching and loving the Th...,0
1,"When I put this movie in my DVD player, and sa...",0
2,Why do people who do not know what a particula...,0
3,Even though I have great interest in Biblical ...,0
4,Im a die hard Dads Army fan and nothing will e...,1


In [8]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

In [9]:
data['label'].value_counts()

0    20019
1    19981
Name: label, dtype: int64

In [10]:
data.groupby(['label']).describe()

Unnamed: 0_level_0,text,text,text,text
Unnamed: 0_level_1,count,unique,top,freq
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,20019,19815,This show comes up with interesting locations ...,3
1,19981,19908,"Hilarious, clean, light-hearted, and quote-wor...",4


In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data['text'],data['label'],test_size=0.2)

**Model Creation**

In [12]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")
Input_layer = tf.keras.layers.Input(shape=(),name='text_input',dtype=tf.string)
preprocessed_text = bert_preprocess(Input_layer)
encoding = bert_encoder(preprocessed_text)

dense_layer = tf.keras.layers.Dense(64,activation='relu')(encoding['pooled_output'])
dropout_layer = tf.keras.layers.Dropout(0.1)(dense_layer)
output_layer = tf.keras.layers.Dense(1,activation='sigmoid')(dropout_layer)

model = tf.keras.models.Model(inputs = [Input_layer],outputs = [output_layer])

In [13]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text_input (InputLayer)        [(None,)]            0           []                               
                                                                                                  
 keras_layer (KerasLayer)       {'input_word_ids':   0           ['text_input[0][0]']             
                                (None, 128),                                                      
                                 'input_mask': (Non                                               
                                e, 128),                                                          
                                 'input_type_ids':                                                
                                (None, 128)}                                                  

In [14]:
METRICS = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall')
]

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=METRICS)

In [16]:
model.fit(X_train, y_train, epochs=5,validation_split=0.15,batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fe0e0d04510>

In [19]:
import tensorflow as tf
from tensorflow import keras
#model.save('bert_model.h5')

In [21]:
loaded_model = tf.keras.models.load_model(('bert_model.h5'),custom_objects={'KerasLayer':hub.KerasLayer})

In [24]:
model.evaluate(X_test,y_test)



[0.4645000398159027, 0.7787500023841858, 0.807055652141571, 0.7374163269996643]

In [25]:
model.save('/content/drive/MyDrive/bert_model.h5')