## **Reading the Dataset as a DataFrame**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import io
import pandas as pd

df = pd.read_excel('/content/drive/MyDrive/Final Year Paper Work/Complete Summarized Dataset.xlsx', header=0,index_col=0)

df = df.dropna() # To remove any None values
df.head()

##**Splitting the data for complete, extractive and abstractive summarized texts**

In [None]:
# Complete Data
com_sentences = df[['Judgement','Judgement Status']]

# Abstractive Data
abs_sentences = df[['Abstractive Summarized Judgements','Judgement Status']]

# Extractive Data
ext_sentences = df[['Extractive Summarized Judgements','Judgement Status']]

from sklearn.model_selection import train_test_split

# Train and Test Split for Complete Data
df_train_com, df_test_com = train_test_split(com_sentences, test_size=0.25, random_state=42)

# Train and Test Split for Abstractive Data
df_train_abs, df_test_abs = train_test_split(abs_sentences, test_size=0.25, random_state=42)

# Train and Test Split for Extractive Data
df_train_ext, df_test_ext = train_test_split(ext_sentences, test_size=0.25, random_state=42)

## **Converting the Judgement Status to Categorical Values**

In [None]:
from tensorflow.keras.utils import to_categorical

# Complete Data
y_train_com = to_categorical(df_train_com['Judgement Status'])
y_test_com = to_categorical(df_test_com['Judgement Status'])

# Abstractive Data
y_train_abs = to_categorical(df_train_abs['Judgement Status'])
y_test_abs = to_categorical(df_test_abs['Judgement Status'])

# Extractive Data
y_train_ext = to_categorical(df_train_ext['Judgement Status'])
y_test_ext = to_categorical(df_test_ext['Judgement Status'])

## **Import Statements**

In [None]:
!pip install transformers

In [None]:
# Import Statements
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Dense

In [None]:
import transformers
from transformers import AutoTokenizer, TFBertModel

bert_tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
bert_model = TFBertModel.from_pretrained('bert-base-cased')

##**1) Complete Data**

In [None]:
# Tokenize the input (takes some time)
# Here, tokenizer used is from bert-base-cased
x_train_com_bert = bert_tokenizer(
    text=df_train_com['Judgement'].tolist(),
    add_special_tokens=True,
    max_length=100,
    truncation=True,
    padding=True,
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)
x_test_com_bert = bert_tokenizer(
    text=df_test_com['Judgement'].tolist(),
    add_special_tokens=True,
    max_length=100,
    truncation=True,
    padding=True,
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)

In [None]:
input_ids_com_bert = x_train_com_bert['input_ids']
attention_mask_com_bert = x_train_com_bert['attention_mask']

In [None]:
# BERT Architecture
max_len = 100
input_ids_com_bert = Input(shape=(max_len,), dtype=tf.int32, name="input_ids")
input_mask_com_bert = Input(shape=(max_len,), dtype=tf.int32, name="attention_mask")
embeddings = bert_model(input_ids_com_bert, attention_mask = input_mask_com_bert)[0]
out = tf.keras.layers.GlobalMaxPool1D()(embeddings)
out = Dense(128, activation='relu')(out)
out = tf.keras.layers.Dropout(0.1)(out)
out = Dense(32,activation = 'relu')(out)
y = Dense(4,activation = 'sigmoid')(out)
model_com_bert = tf.keras.Model(inputs=[input_ids_com_bert, input_mask_com_bert], outputs=y)
model_com_bert.layers[2].trainable = True

In [None]:
optimizer_bert = tf.keras.optimizers.legacy.Adam(
    learning_rate=5e-05, # this learning rate is for bert model , taken from huggingface website
    epsilon=1e-08,
    decay=0.01,
    clipnorm=1.0)

# Set loss and metrics
loss_bert = CategoricalCrossentropy(from_logits = True)
metric_bert = CategoricalAccuracy('balanced_accuracy')

# Compile the model
model_com_bert.compile(
    optimizer = optimizer_bert,
    loss=loss_bert,
    metrics = metric_bert)
model_com_bert.summary()

In [None]:
history_com_bert = model_com_bert.fit(
    x ={'input_ids':x_train_com_bert['input_ids'],'attention_mask':x_train_com_bert['attention_mask']} ,
    y = y_train_com,
    validation_data = (
    {'input_ids':x_test_com_bert['input_ids'],'attention_mask':x_test_com_bert['attention_mask']}, y_test_com
    ),
  epochs=10,
    batch_size=64
)

In [None]:
predicted_raw_com_bert = model_com_bert.predict({'input_ids':x_test_com_bert['input_ids'],'attention_mask':x_test_com_bert['attention_mask']})
predicted_raw_com_bert[0]

In [None]:
import numpy as np

y_predicted_com_bert = np.argmax(predicted_raw_com_bert, axis = 1)
y_true_com_bert = df_test_com['Judgement Status']

In [None]:
from sklearn.metrics import classification_report, f1_score
print(classification_report(y_true_com_bert, y_predicted_com_bert))

print("F1 Score: ",f1_score(y_true_com_bert,y_predicted_com_bert, average='macro'))

In [None]:
x_train_com_bert = {'input_ids':x_train_com_bert['input_ids'],'attention_mask':x_train_com_bert['attention_mask']}

train_loss_com_bert, train_accuracy_com_bert = model_com_bert.evaluate(x_train_com_bert, y_train_com, verbose=False)
print("Training Accuracy: {:.4f}".format(train_accuracy_com_bert))

In [None]:
x_test_com_bert = {'input_ids':x_test_com_bert['input_ids'],'attention_mask':x_test_com_bert['attention_mask']}

test_loss_com_bert, test_accuracy_com_bert = model_com_bert.evaluate(x_test_com_bert, y_test_com, verbose=False)
print("Testing Accuracy:  {:.4f}".format(test_accuracy_com_bert))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_'+string])
    plt.xlabel("Epochs")
    plt.ylabel(string)
    plt.legend([string, 'val_'+string])
    plt.show()

plot_graphs(history_com_bert, "balanced_accuracy")
plot_graphs(history_com_bert, "loss")

In [None]:
model_com_bert.save("/content/drive/MyDrive/Final Year Paper Work/H5 Files/COM-BERT.h5")

In [None]:
import pandas as pd

# Calculate the "Model Outcome" based on the predicted values and actual "Judgement Status"
y_predicted_com_bert = np.argmax(predicted_raw_com_bert, axis=1)
model_outcome_bert = [1 if predicted == actual else 0 for predicted, actual in zip(y_predicted_com_bert, y_true_com_bert)]

# Create a new DataFrame with "Judgements" and "Model Outcome" columns
results_df_com_bert = pd.DataFrame({'Judgements': df_test_com['Judgement'], 'Model Outcome': model_outcome_bert})

# Save the DataFrame to a CSV file if needed
results_df_com_bert.to_csv('model_results.csv', index=False)

##**2) Abstractive Summarized Data**


In [None]:
# Tokenize the input (takes some time)
# Here, tokenizer used is from bert-base-cased
x_train_abs_bert = bert_tokenizer(
    text=df_train_abs['Abstractive Summarized Judgements'].tolist(),
    add_special_tokens=True,
    max_length=100,
    truncation=True,
    padding=True,
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)
x_test_abs_bert = bert_tokenizer(
    text=df_test_abs['Abstractive Summarized Judgements'].tolist(),
    add_special_tokens=True,
    max_length=100,
    truncation=True,
    padding=True,
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)

In [None]:
input_ids_abs_bert = x_train_abs_bert['input_ids']
attention_mask_abs_bert = x_train_abs_bert['attention_mask']

In [None]:
# BERT Architecture
max_len = 100
input_ids_abs_bert = Input(shape=(max_len,), dtype=tf.int32, name="input_ids")
input_mask_abs_bert = Input(shape=(max_len,), dtype=tf.int32, name="attention_mask")
embeddings = bert_model(input_ids_abs_bert, attention_mask = input_mask_abs_bert)[0]
out = tf.keras.layers.GlobalMaxPool1D()(embeddings)
out = Dense(128, activation='relu')(out)
out = tf.keras.layers.Dropout(0.1)(out)
out = Dense(32,activation = 'relu')(out)
y = Dense(4,activation = 'sigmoid')(out)
model_abs_bert = tf.keras.Model(inputs=[input_ids_abs_bert, input_mask_abs_bert], outputs=y)
model_abs_bert.layers[2].trainable = True

In [None]:
optimizer_bert = tf.keras.optimizers.legacy.Adam(
    learning_rate=5e-05, # this learning rate is for bert model , taken from huggingface website
    epsilon=1e-08,
    decay=0.01,
    clipnorm=1.0)

# Set loss and metrics
loss_bert = CategoricalCrossentropy(from_logits = True)
metric_bert = CategoricalAccuracy('balanced_accuracy')

# Compile the model
model_abs_bert.compile(
    optimizer = optimizer_bert,
    loss=loss_bert,
    metrics = metric_bert)
model_abs_bert.summary()

In [None]:
history_abs_bert = model_abs_bert.fit(
    x ={'input_ids':x_train_abs_bert['input_ids'],'attention_mask':x_train_abs_bert['attention_mask']} ,
    y = y_train_abs,
    validation_data = (
    {'input_ids':x_test_abs_bert['input_ids'],'attention_mask':x_test_abs_bert['attention_mask']}, y_test_abs
    ),
  epochs=10,
    batch_size=64
)

In [None]:
predicted_raw_abs_bert = model_abs_bert.predict({'input_ids':x_test_abs_bert['input_ids'],'attention_mask':x_test_abs_bert['attention_mask']})
predicted_raw_abs_bert[0]

In [None]:
import numpy as np

y_predicted_abs_bert = np.argmax(predicted_raw_abs_bert, axis = 1)
y_true_abs_bert = df_test_abs['Judgement Status']

In [None]:
from sklearn.metrics import classification_report, f1_score
print(classification_report(y_true_abs_bert, y_predicted_abs_bert))

print("F1 Score: ",f1_score(y_true_abs_bert,y_predicted_abs_bert, average='macro'))

In [None]:
x_train_abs_bert = {'input_ids':x_train_abs_bert['input_ids'],'attention_mask':x_train_abs_bert['attention_mask']}

train_loss_abs_bert, train_accuracy_abs_bert = model_abs_bert.evaluate(x_train_abs_bert, y_train_abs, verbose=False)
print("Training Accuracy: {:.4f}".format(train_accuracy_abs_bert))

In [None]:
x_test_abs_bert = {'input_ids':x_test_abs_bert['input_ids'],'attention_mask':x_test_abs_bert['attention_mask']}

test_loss_abs_bert, test_accuracy_abs_bert = model_abs_bert.evaluate(x_test_abs_bert, y_test_abs, verbose=False)
print("Testing Accuracy:  {:.4f}".format(test_accuracy_abs_bert))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_'+string])
    plt.xlabel("Epochs")
    plt.ylabel(string)
    plt.legend([string, 'val_'+string])
    plt.show()

plot_graphs(history_abs_bert, "balanced_accuracy")
plot_graphs(history_abs_bert, "loss")

In [None]:
model_abs_bert.save("/content/drive/MyDrive/Final Year Paper Work/H5 Files/ABS-BERT.h5")

## **3) Extractive Summarized Data**

In [None]:
# Tokenize the input (takes some time)
# Here, tokenizer used is from bert-base-cased
x_train_ext_bert = bert_tokenizer(
    text=df_train_ext['Extractive Summarized Judgements'].tolist(),
    add_special_tokens=True,
    max_length=100,
    truncation=True,
    padding=True,
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)
x_test_ext_bert = bert_tokenizer(
    text=df_test_ext['Extractive Summarized Judgements'].tolist(),
    add_special_tokens=True,
    max_length=100,
    truncation=True,
    padding=True,
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)

In [None]:
input_ids_ext_bert = x_train_ext_bert['input_ids']
attention_mask_ext_bert = x_train_ext_bert['attention_mask']

In [None]:
# BERT Architecture
max_len = 100
input_ids_ext_bert = Input(shape=(max_len,), dtype=tf.int32, name="input_ids")
input_mask_ext_bert = Input(shape=(max_len,), dtype=tf.int32, name="attention_mask")
embeddings = bert_model(input_ids_ext_bert, attention_mask = input_mask_ext_bert)[0]
out = tf.keras.layers.GlobalMaxPool1D()(embeddings)
out = Dense(128, activation='relu')(out)
out = tf.keras.layers.Dropout(0.1)(out)
out = Dense(32,activation = 'relu')(out)
y = Dense(4,activation = 'sigmoid')(out)
model_ext_bert = tf.keras.Model(inputs=[input_ids_ext_bert, input_mask_ext_bert], outputs=y)
model_ext_bert.layers[2].trainable = True

In [None]:
optimizer_bert = tf.keras.optimizers.legacy.Adam(
    learning_rate=5e-05, # this learning rate is for bert model , taken from huggingface website
    epsilon=1e-08,
    decay=0.01,
    clipnorm=1.0)

# Set loss and metrics
loss_bert = CategoricalCrossentropy(from_logits = True)
metric_bert = CategoricalAccuracy('balanced_accuracy')

# extpile the model
model_ext_bert.compile(
    optimizer = optimizer_bert,
    loss=loss_bert,
    metrics = metric_bert)
model_ext_bert.summary()

In [None]:
history_ext_bert = model_ext_bert.fit(
    x ={'input_ids':x_train_ext_bert['input_ids'],'attention_mask':x_train_ext_bert['attention_mask']} ,
    y = y_train_ext,
    validation_data = (
    {'input_ids':x_test_ext_bert['input_ids'],'attention_mask':x_test_ext_bert['attention_mask']}, y_test_ext
    ),
  epochs=10,
    batch_size=64
)

In [None]:
predicted_raw_ext_bert = model_ext_bert.predict({'input_ids':x_test_ext_bert['input_ids'],'attention_mask':x_test_ext_bert['attention_mask']})
predicted_raw_ext_bert[0]

In [None]:
import numpy as np

y_predicted_ext_bert = np.argmax(predicted_raw_ext_bert, axis = 1)
y_true_ext_bert = df_test_ext['Judgement Status']

In [None]:
from sklearn.metrics import classification_report, f1_score
print(classification_report(y_true_ext_bert, y_predicted_ext_bert))

print("F1 Score: ",f1_score(y_true_ext_bert,y_predicted_ext_bert, average='macro'))

In [None]:
x_train_ext_bert = {'input_ids':x_train_ext_bert['input_ids'],'attention_mask':x_train_ext_bert['attention_mask']}

train_loss_ext_bert, train_accuracy_ext_bert = model_ext_bert.evaluate(x_train_ext_bert, y_train_ext, verbose=False)
print("Training Accuracy: {:.4f}".format(train_accuracy_ext_bert))

In [None]:
x_test_ext_bert = {'input_ids':x_test_ext_bert['input_ids'],'attention_mask':x_test_ext_bert['attention_mask']}

test_loss_ext_bert, test_accuracy_ext_bert = model_ext_bert.evaluate(x_test_ext_bert, y_test_ext, verbose=False)
print("Testing Accuracy:  {:.4f}".format(test_accuracy_ext_bert))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_'+string])
    plt.xlabel("Epochs")
    plt.ylabel(string)
    plt.legend([string, 'val_'+string])
    plt.show()

plot_graphs(history_ext_bert, "balanced_accuracy")
plot_graphs(history_ext_bert, "loss")

In [None]:
model_ext_bert.save("/content/drive/MyDrive/Final Year Paper Work/H5 Files/EXT-BERT.h5")