In [1]:
%%capture
%pip install transformers datasets accelerate torch evaluate bert_score rouge_score bitsandbytes

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import evaluate

# Load datasets
train_df = pd.read_csv("/kaggle/input/springer-journal-final/train.csv")
val_df = pd.read_csv("/kaggle/input/springer-journal-final/val.csv")
test_df = pd.read_csv("/kaggle/input/springer-journal-final/test.csv")

# Preprocess data
def preprocess_text(text):
    return text.lower().strip()

train_abstracts = train_df['abstract'].apply(preprocess_text).tolist()
train_titles = train_df['title'].apply(preprocess_text).tolist()

val_abstracts = val_df['abstract'].apply(preprocess_text).tolist()
val_titles = val_df['title'].apply(preprocess_text).tolist()

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_abstracts + train_titles)

vocab_size = len(tokenizer.word_index) + 1

train_sequences = tokenizer.texts_to_sequences(train_abstracts)
train_titles_sequences = tokenizer.texts_to_sequences(train_titles)

max_len = max(len(seq) for seq in train_sequences)
train_padded = pad_sequences(train_sequences, maxlen=max_len, padding='post')
train_titles_padded = pad_sequences(train_titles_sequences, maxlen=max_len, padding='post')

# Build model
embedding_dim = 128
lstm_units = 256

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len),
    LSTM(lstm_units, return_sequences=False),
    Dense(vocab_size, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train model
model.fit(train_padded, np.array(train_titles_padded)[:, 0], epochs=10, validation_data=(train_padded, np.array(train_titles_padded)[:, 0]))

# Evaluate model
def generate_title(abstract):
    sequence = tokenizer.texts_to_sequences([preprocess_text(abstract)])
    padded = pad_sequences(sequence, maxlen=max_len, padding='post')
    prediction = model.predict(padded)
    predicted_index = np.argmax(prediction)
    return tokenizer.index_word.get(predicted_index, "")

predictions = [generate_title(abs) for abs in test_df['abstract']]
references = test_df['title'].tolist()

rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

rouge_scores = rouge.compute(predictions=predictions, references=references)
bert_scores = bertscore.compute(predictions=predictions, references=references, lang="en")

print("ROUGE:", rouge_scores)
print("BERTScore (averaged):")
print("  Precision:", sum(bert_scores["precision"]) / len(bert_scores["precision"]))
print("  Recall:", sum(bert_scores["recall"]) / len(bert_scores["recall"]))
print("  F1:", sum(bert_scores["f1"]) / len(bert_scores["f1"]))




Epoch 1/10
[1m1426/1426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m237s[0m 162ms/step - accuracy: 0.1093 - loss: 8.0092 - val_accuracy: 0.1129 - val_loss: 6.8721
Epoch 2/10
[1m1426/1426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 163ms/step - accuracy: 0.1142 - loss: 6.8511 - val_accuracy: 0.1129 - val_loss: 6.7574
Epoch 3/10
[1m1426/1426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 163ms/step - accuracy: 0.1107 - loss: 6.7989 - val_accuracy: 0.1129 - val_loss: 6.7328
Epoch 4/10
[1m1426/1426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 163ms/step - accuracy: 0.1116 - loss: 6.7801 - val_accuracy: 0.1129 - val_loss: 6.7259
Epoch 5/10
[1m1426/1426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 163ms/step - accuracy: 0.1158 - loss: 6.7345 - val_accuracy: 0.1129 - val_loss: 6.7229
Epoch 6/10
[1m1426/1426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 163ms/step - accuracy: 0.1111 - loss: 6.7351 - val_accuracy: 0.1129 - val_loss:

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.95k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ROUGE: {'rouge1': 0.037106168759961096, 'rouge2': 0.0, 'rougeL': 0.037089191757222625, 'rougeLsum': 0.037130554524320875}
BERTScore (averaged):
  Precision: 0.8378197825952405
  Recall: 0.7814797681269992
  F1: 0.808547284462602
