### Imporrint necessary libraries

In [1]:
import os

import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib as plt

import tensorflow_hub as hub
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
import tensorflow_text

from langdetect import detect


### Loading The Data

In [2]:
path_to_train_data="../Dataset/train_data.csv"
path_to_test_data="../Dataset/test_data.csv"
train_data= pd.read_csv(path_to_train_data)
test_data= pd.read_csv(path_to_test_data)
train_data.head()

Unnamed: 0,campaign_id,comment_id,comment_description,sentiment
0,2212,17908351952371091,لخسارة الوزن الزائد والكرش بمدة قياسية مع عدم ...,Negative
1,2217,17935944230085744,🔥🔥🔥,Positive
2,2215S,17899518356507020,This is so good😍 would be great it If you add ...,Negative
3,2214,18014766136389857,😍,Positive
4,2203,17924318627206870,طبق رائع ومميز تبارك الرحمن تسلم ايدك يارب 😍,Positive


- dataset caracteristics

In [3]:
print(train_data.dtypes)
train_data.groupby('sentiment').describe()

campaign_id            object
comment_id              int64
comment_description    object
sentiment              object
dtype: object


Unnamed: 0_level_0,comment_id,comment_id,comment_id,comment_id,comment_id,comment_id,comment_id,comment_id
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
sentiment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Irrelevant,2.0,1.803449e+16,27272420000000.0,1.80152e+16,1.802484e+16,1.803449e+16,1.804413e+16,1.805377e+16
Negative,1082.0,1.799547e+16,121523200000000.0,1.78432e+16,1.791969e+16,1.795389e+16,1.802626e+16,1.840129e+16
Positive,4416.0,1.799213e+16,117728700000000.0,1.784217e+16,1.791955e+16,1.795206e+16,1.801864e+16,1.840678e+16


### Loading and building Bert Model

In [4]:
model_path= "./Bert_model/bert_cased"
preprocessor_path="./Bert_model/bert_multi_cased_preprocessor"

def define_model(preprocessor_path,model_path):
    # Loading model and preprocessor
    input_text = Input(shape=(), dtype=tf.string, name="input_text")
    preprocessor_layer=hub.KerasLayer(preprocessor_path,name='preprocessor')
    preprocessed_inputs=preprocessor_layer(input_text)
    
    input_word_ids = preprocessed_inputs['input_word_ids']
    input_mask = preprocessed_inputs['input_mask']
    input_type_ids = preprocessed_inputs['input_type_ids']
    
    bert_layer = hub.KerasLayer(model_path, trainable=True,name='Bert_encoder')
    bert_outputs = bert_layer([input_word_ids,input_mask,input_type_ids])
    dense = Dense(256, activation='relu')(bert_outputs[0])
    output_layer = Dense(1, activation='sigmoid',name='classifier')(dense)  # Binary classification, change units for multi-class
    model = Model(input_text, output_layer)
    return model,preprocessor_layer
bert_classifier,bert_preprocessor=define_model(preprocessor_path,model_path)
bert_classifier.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_text (InputLayer)        [(None,)]            0           []                               
                                                                                                  
 preprocessor (KerasLayer)      {'input_mask': (Non  0           ['input_text[0][0]']             
                                e, 128),                                                          
                                 'input_type_ids':                                                
                                (None, 128),                                                      
                                 'input_word_ids':                                                
                                (None, 128)}                                                  

### Preprocessing Data

- Removing non arabic and non english comment from the dataset

In [5]:
# Remove sentiments with irrelevant class because they are only two
irrelevant_indexes=train_data[train_data['sentiment']=='Irrelevant'].index
train_data.drop(irrelevant_indexes,inplace=True)
# remove non english and non arabic comments 
i=0
while i <= len(train_data):
    try:
        lang= detect(train_data.iloc[i,2])
        if lang !='ar' and lang !='eng':
            train_data.drop(i,inplace=True)
            i=i+1
        else:
            i=i+1
            continue
    except:
        i=i+1
        continue
train_data.groupby('sentiment').describe()

Unnamed: 0_level_0,comment_id,comment_id,comment_id,comment_id,comment_id,comment_id,comment_id,comment_id
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
sentiment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Negative,873.0,1.799507e+16,118508900000000.0,1.78432e+16,1.792365e+16,1.795419e+16,1.802385e+16,1.840129e+16
Positive,3449.0,1.799202e+16,118657300000000.0,1.784231e+16,1.791887e+16,1.795118e+16,1.80172e+16,1.840678e+16


- Cleaning Strings

In [22]:
# blance data

- Splitting comment and sentiments

In [9]:
train_comments=train_data['comment_description'].astype(str).values
#changing sentiment labels from (negativ, positive) to (0,1)
train_labels=train_data['sentiment'].astype(str)
train_labels,uniques=pd.factorize(train_labels.values)

### Evaluate pretrained Bert model

In [12]:
pred = bert_classifier.predict(['nice one'])



### Fine tune bert model

- Model compiling and Hyperparameters congiguration

In [12]:
metrics = [tf.keras.metrics.SparseCategoricalAccuracy('accuracy', dtype=tf.float32)]
loss = tf.keras.losses.SparseCategoricalCrossentropy()

bert_classifier.compile(optimizer="adam",loss=loss, metrics=metrics)

- Training the model

In [None]:
# model fit on the data
epochs=10
batch_size = 10

bert_classifier.fit(
    train_comments,
    train_labels,
    epochs=epochs,
    batch_size=batch_size
)

### Evaluate fine tuned model

### Export the model