In [1]:
%%capture
%pip install transformers datasets accelerate torch evaluate bert_score rouge_score bitsandbytes

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import evaluate

# Enable Eager Execution to avoid Graph Mode issues
tf.config.run_functions_eagerly(True)

# Load evaluation metrics
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

def load_data(file_path):
    df = pd.read_csv(file_path)
    return df['abstract'].tolist(), df['title'].tolist()

# Load datasets
train_abstracts, train_titles = load_data("/kaggle/input/springer-journal-final/train.csv")
val_abstracts, val_titles = load_data("/kaggle/input/springer-journal-final/val.csv")
test_abstracts, test_titles = load_data("/kaggle/input/springer-journal-final/test.csv")

# Tokenization with vocabulary size limit
tokenizer = Tokenizer(num_words=50000)
tokenizer.fit_on_texts(train_abstracts + train_titles)

vocab_size = len(tokenizer.word_index) + 1

train_sequences = tokenizer.texts_to_sequences(train_abstracts)
train_titles_sequences = tokenizer.texts_to_sequences(train_titles)
val_sequences = tokenizer.texts_to_sequences(val_abstracts)
val_titles_sequences = tokenizer.texts_to_sequences(val_titles)
test_sequences = tokenizer.texts_to_sequences(test_abstracts)

# Define max_len
MAX_LEN = 300  # Limiting the max length of the sequences

train_padded = pad_sequences(train_sequences, maxlen=MAX_LEN, padding='post')
train_titles_padded = pad_sequences(train_titles_sequences, maxlen=MAX_LEN, padding='post')
val_padded = pad_sequences(val_sequences, maxlen=MAX_LEN, padding='post')
val_titles_padded = pad_sequences(val_titles_sequences, maxlen=MAX_LEN, padding='post')
test_padded = pad_sequences(test_sequences, maxlen=MAX_LEN, padding='post')

# Define GRU model using Functional API
EMBEDDING_DIM = 128
HIDDEN_DIM = 256

inputs = tf.keras.Input(shape=(MAX_LEN,))
x = tf.keras.layers.Embedding(vocab_size, EMBEDDING_DIM, mask_zero=True)(inputs)
x = tf.keras.layers.GRU(HIDDEN_DIM, return_sequences=True)(x)
outputs = tf.keras.layers.Dense(50000, activation='softmax')(x)  # Output layer with vocabulary limit

model = tf.keras.Model(inputs, outputs)

model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), optimizer='adam', metrics=['accuracy'])

# Train model with smaller batch size
model.fit(train_padded, train_titles_padded, validation_data=(val_padded, val_titles_padded), epochs=10, batch_size=16)

# Generate titles
def generate_title(model, abstract_seq):
    pred_seq = model.predict(abstract_seq, verbose=0)
    pred_tokens = np.argmax(pred_seq, axis=-1)[0]
    return " ".join([tokenizer.index_word.get(token, "") for token in pred_tokens if token > 0])

predictions = [generate_title(model, np.expand_dims(seq, axis=0)) for seq in test_padded]
references = test_titles

# Evaluate results
rouge_scores = rouge.compute(predictions=predictions, references=references)
bert_scores = bertscore.compute(predictions=predictions, references=references, lang="en")

# Print results
print("ROUGE:", rouge_scores)
print("BERTScore (averaged):")
print("  Precision:", sum(bert_scores["precision"]) / len(bert_scores["precision"]))
print("  Recall:", sum(bert_scores["recall"]) / len(bert_scores["recall"]))
print("  F1:", sum(bert_scores["f1"]) / len(bert_scores["f1"]))

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.95k [00:00<?, ?B/s]



Epoch 1/10
[1m2851/2851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m694s[0m 242ms/step - accuracy: 0.6694 - loss: 0.8626 - val_accuracy: 0.6383 - val_loss: 0.4530
Epoch 2/10
[1m2851/2851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m686s[0m 241ms/step - accuracy: 0.6429 - loss: 0.4548 - val_accuracy: 0.6383 - val_loss: 0.4464
Epoch 3/10
[1m2851/2851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m688s[0m 241ms/step - accuracy: 0.6418 - loss: 0.4421 - val_accuracy: 0.6384 - val_loss: 0.4376
Epoch 4/10
[1m2851/2851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m707s[0m 248ms/step - accuracy: 0.6411 - loss: 0.4254 - val_accuracy: 0.6383 - val_loss: 0.4348
Epoch 5/10
[1m2851/2851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m706s[0m 248ms/step - accuracy: 0.6430 - loss: 0.4060 - val_accuracy: 0.6382 - val_loss: 0.4369
Epoch 6/10
[1m2851/2851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m699s[0m 245ms/step - accuracy: 0.6431 - loss: 0.3883 - val_accuracy: 0.6379 - val_loss:

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ROUGE: {'rouge1': 0.04099031037981893, 'rouge2': 0.0027623514596621895, 'rougeL': 0.03505233564189242, 'rougeLsum': 0.03500190159065407}
BERTScore (averaged):
  Precision: 0.6884950721730184
  Recall: 0.8011957163184259
  F1: 0.7401501042469408
