In [None]:
!pip install bert-score rouge_score


In [None]:
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from bert_score import score as bert_score
import numpy as np
from nltk.corpus import brown
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from sklearn.model_selection import train_test_split
import nltk
nltk.download('brown')
nltk.download('punkt')

In [3]:
# Step 1: Load and clean data
sentences = brown.sents()
sentences = [' '.join(sent).lower() for sent in sentences if len(sent) >= 3]
sentences = sentences[:5000]  # Limit for quick training


In [4]:
# Step 2: Tokenize
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1

In [5]:
# Step 3: Create sequences with 4-word context and 1 target word
X, y = [], []
for sent in sentences:
    tokens = tokenizer.texts_to_sequences([sent])[0]
    for i in range(4, len(tokens)):
        context = tokens[i-4:i]
        target = tokens[i]
        X.append(context)
        y.append(target)

X = np.array(X)
y = np.array(y)

In [6]:
# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

embedding_dim = 100
gru_units = 128


In [None]:
# Step 5: Prepare the model
model = Sequential()
# Changed here: input_shape instead of input_length
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_shape=(2,)))
model.add(GRU(gru_units))
model.add(Dense(vocab_size, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
# Step 6: Train
model.fit(X_train, y_train, epochs=5, batch_size=256, validation_data=(X_test, y_test))

In [None]:
# Step 7: Generate text function
def generate_text(seed_text, next_words=20):
    result = seed_text.split()
    for _ in range(next_words):
        # Prepare input sequence
        token_list = tokenizer.texts_to_sequences([result[-4:]])[0]
        if len(token_list) < 4:
            # Pad if less than 4 tokens (at generation start)
            token_list = [0]*(4 - len(token_list)) + token_list
        token_list = np.array(token_list).reshape(1, 4)

        # Predict next word
        predicted_probs = model.predict(token_list, verbose=0)[0]
        predicted_index = np.argmax(predicted_probs)

        # Map index to word
        predicted_word = tokenizer.index_word.get(predicted_index, '')
        if predicted_word == '':
            break

        result.append(predicted_word)
    return ' '.join(result)

In [None]:
# Step 8: Evaluate on test set using BLEU, ROUGE (1,2,L), and BERTScore

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

bleu_scores = []
rouge1_scores = []
rouge2_scores = []
rougeL_scores = []

pred_texts = []
true_texts = []

num_eval_samples = 100  # limit eval for speed

for i in range(min(num_eval_samples, len(X_test))):
    context_tokens = X_test[i]
    true_word_idx = y_test[i]

    seed_words = [tokenizer.index_word.get(idx, '') for idx in context_tokens if idx != 0]
    seed_text = ' '.join(seed_words)

    input_seq = np.array(context_tokens).reshape(1, 4)
    pred_probs = model.predict(input_seq, verbose=0)[0]
    pred_idx = np.argmax(pred_probs)
    pred_word = tokenizer.index_word.get(pred_idx, '')

    true_word = tokenizer.index_word.get(true_word_idx, '')

    # BLEU (1-gram)
    bleu = sentence_bleu([true_word.split()], pred_word.split(), weights=(1, 0, 0, 0))
    bleu_scores.append(bleu)

    # ROUGE
    rouge_scores = scorer.score(true_word, pred_word)
    rouge1_scores.append(rouge_scores['rouge1'].fmeasure)
    rouge2_scores.append(rouge_scores['rouge2'].fmeasure)
    rougeL_scores.append(rouge_scores['rougeL'].fmeasure)

    pred_texts.append(pred_word)
    true_texts.append(true_word)

print(f"Avg BLEU score (next word): {np.mean(bleu_scores):.4f}")
print(f"Avg ROUGE-1 F1: {np.mean(rouge1_scores):.4f}")
print(f"Avg ROUGE-2 F1: {np.mean(rouge2_scores):.4f}")
print(f"Avg ROUGE-L F1: {np.mean(rougeL_scores):.4f}")

# BERTScore (for single-word prediction, still illustrative)
P, R, F1 = bert_score(pred_texts, true_texts, lang='en', verbose=True)
print(f"Avg BERTScore F1: {F1.mean():.4f}")



In [None]:
# Step 9: Example generation

print("\nExample generated text:")
seed = "I am a good"
generated = generate_text(seed, next_words=15)
print(generated)
