In [None]:
pip install tensorflow==2.12 keras==2.12 transformers==4.27.4 nltk seaborn scikit-learn nltk  numpy pandas matplotlib



In [None]:
import nltk
nltk.download('omw-1.4')

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [None]:
import nltk
nltk.download('all')

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_eng is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_ru is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_r

True

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import os
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import re
#?????
from sklearn.metrics import accuracy_score
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.tokenize import word_tokenize
import pickle  # For saving models in Pickle format

# Set random seeds for reproducibility
seed_value = 42
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# Output directory for saving plots and model
output_dir = 'nfr_models/nfr-clean'
os.makedirs(output_dir, exist_ok=True)

# Load your dataset and filter for 'productivity' domain
print("Loading dataset...")
df = pd.read_csv("FR_NFR_Dataset_Big_Last.csv",encoding="ISO-8859-1")

print(df.head())

print(f"Dataset loaded: {len(df)} samples .")

# Encode categorical labels
label_encoder = LabelEncoder()
df['encoded_label'] = label_encoder.fit_transform(df['classType'])

# Initialize the BERT tokenizer and set max sequence length
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_length = 256

# Initialize the BERT-based model
num_labels = len(df['encoded_label'].unique())
print(f"Number of unique labels: {num_labels}")
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)

# Define NLTK functions
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

def clean_text(text):
    text = text.lower()
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word not in stop_words]
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    tokens = [stemmer.stem(word) for word in tokens]
    return ' '.join(tokens)

def preprocess_data(df, tokenizer, max_length=256):
    texts = df['RequirementText'].apply(clean_text).tolist()
    labels = df['encoded_label'].tolist()
    tokenized = tokenizer(texts, padding=True, truncation=True, return_tensors='tf', max_length=max_length)
    return tokenized, labels

# Define stratified K-fold cross-validation
n_splits = 10
all_test_labels = []
all_predicted_labels = []
all_train_loss = []
all_train_accuracy = []
all_val_loss = []
all_val_accuracy = []
#????
per_fold_test_acc =[]
fold_scores = []

for fold, (train_indices, val_indices) in enumerate(StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42).split(df['RequirementText'], df['encoded_label'])):
    print(f"Fold {fold + 1}...")
    fold_train_df = df.iloc[train_indices]
    fold_val_df = df.iloc[val_indices]
    train_df, val_df = train_test_split(fold_train_df, test_size=0.1, random_state=42)

    train_data, train_labels = preprocess_data(train_df, tokenizer, max_length)
    val_data, val_labels = preprocess_data(val_df, tokenizer, max_length)
    test_data, test_labels = preprocess_data(fold_val_df, tokenizer, max_length)

    train_data = {key: np.array(val) for key, val in train_data.items()}
    val_data = {key: np.array(val) for key, val in val_data.items()}
    test_data = {key: np.array(val) for key, val in test_data.items()}

    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
    model_checkpoint = ModelCheckpoint(os.path.join(output_dir, f'best_model_fold_{fold}'), save_best_only=True)

    history =model.fit(
        train_data, np.array(train_labels),
        validation_data=(val_data, np.array(val_labels)),
        #***
        epochs=8, batch_size=16,
        callbacks=[early_stopping, model_checkpoint],
        verbose=2
    )


#evaluate the model*******************

    #all_train_accuracy.append(history.history['accuracy'])
    #all_val_accuracy.append(history.history['val_accuracy'])

    val_curve   = history.history['val_accuracy']
    print( val_curve)
    train_curve = history.history['accuracy']
    print(train_curve)
    best_epoch  = int(np.argmax(val_curve))          # 0‑based index

    best_val_acc   = float(val_curve[best_epoch])
    best_train_acc = float(train_curve[best_epoch])

    #test the model in test data
    test_predictions = model.predict(test_data)['logits']
    test_predicted_labels = np.argmax(test_predictions, axis=1)
    all_test_labels.extend(test_labels)
    all_predicted_labels.extend(test_predicted_labels)
    #?????????????get test accuracy -Dcheck
    fold_test_acc = accuracy_score(test_labels, test_predicted_labels)
    print(f"Fold* {fold} – test accuracy: {fold_test_acc:.4f}")
    per_fold_test_acc.append(fold_test_acc)
     # ─── Save results for this fold ──────────────────────────────
    fold_scores.append({
        'fold':        fold,
        'best_epoch':  best_epoch + 1,   # human‑friendly 1‑based
        'train_acc':   best_train_acc,
        'val_acc':     best_val_acc,
        'test_acc':    fold_test_acc,
    })

#********** Print mean of training accuracies for training and validation
#print(f"\nAverage Train Accuracy: {np.mean(all_train_accuracy):.4f}")
#print(f"Average Validation Accuracy: {np.mean(all_val_accuracy):.4f}")
print(f"\nAverage Test Accuracy: {np.mean(per_fold_test_acc):.4f}")

#*****
# ─── After all folds: aggregate ─────────────────────────────────
scores_df = pd.DataFrame(fold_scores)
print("\n📊 Per‑fold summary:")
print(scores_df)

print("\n🔍 Mean ± SD over folds")
for col in ['train_acc', 'val_acc', 'test_acc']:
    mean = scores_df[col].mean()
    std  = scores_df[col].std(ddof=1)
    print(f"{col:>9}: {mean:.4f} ± {std:.4f}")
# Print classification report for test data
class_report = classification_report(all_test_labels, all_predicted_labels, target_names=label_encoder.classes_)
print("NFR-clean: Classification Report for test data Across Folds:\n", class_report)

# Save the trained model in .h5 format
h5_path = os.path.join(output_dir, "bert_model_weights.h5")
model.save_weights(h5_path)
print(f"Model weights saved as .h5 at: {h5_path}")

# Save the entire model using Pickle
pickle_path = os.path.join(output_dir, "bert_model.pkl")
with open(pickle_path, "wb") as f:
    pickle.dump(model, f)
print(f"Model saved as Pickle at: {pickle_path}")

# Save the model in HuggingFace format
model.save_pretrained(output_dir)
print(f"Model saved in HuggingFace format at: {output_dir}")

# Calculate the confusion matrix
conf_mat = confusion_matrix(all_test_labels, all_predicted_labels)

# Visualize the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Functiona and Non-Functional Requirments: Confusion Matrix')
plt.savefig(os.path.join(output_dir, 'confusion_matrix.png'))
plt.show()

# Final Message
print("Model training, evaluation, and saving completed for the nfr classfication.")

Loading dataset...
                                     RequirementText classType
0  The app shall run on a smart phone with Androi...       NFR
1  All layout shall be according to the TU/e corp...       NFR
2  The icons shall be according to the Android Ic...       NFR
3  The user interfaces shall be according to the ...       NFR
4  The Twitter "tweet" interface shall be accordi...       NFR
Dataset loaded: 6117 samples .


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Number of unique labels: 2


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1...
Epoch 1/8




310/310 - 419s - loss: 0.4850 - accuracy: 0.7741 - val_loss: 0.3694 - val_accuracy: 0.8457 - 419s/epoch - 1s/step
Epoch 2/8




310/310 - 384s - loss: 0.3345 - accuracy: 0.8704 - val_loss: 0.3381 - val_accuracy: 0.8548 - 384s/epoch - 1s/step
Epoch 3/8
310/310 - 352s - loss: 0.2561 - accuracy: 0.9047 - val_loss: 0.3459 - val_accuracy: 0.8693 - 352s/epoch - 1s/step
Epoch 4/8
310/310 - 351s - loss: 0.1941 - accuracy: 0.9346 - val_loss: 0.3726 - val_accuracy: 0.8566 - 351s/epoch - 1s/step
Epoch 5/8
310/310 - 351s - loss: 0.1538 - accuracy: 0.9512 - val_loss: 0.3938 - val_accuracy: 0.8893 - 351s/epoch - 1s/step
Epoch 6/8
310/310 - 351s - loss: 0.1235 - accuracy: 0.9566 - val_loss: 0.3814 - val_accuracy: 0.8748 - 351s/epoch - 1s/step
Epoch 7/8
310/310 - 351s - loss: 0.0902 - accuracy: 0.9723 - val_loss: 0.4305 - val_accuracy: 0.8730 - 351s/epoch - 1s/step
Epoch 8/8
310/310 - 352s - loss: 0.0851 - accuracy: 0.9719 - val_loss: 0.4261 - val_accuracy: 0.8748 - 352s/epoch - 1s/step
[0.8457350134849548, 0.8548094630241394, 0.869328498840332, 0.8566243052482605, 0.8892921805381775, 0.8747731447219849, 0.872958242893219, 0.8



310/310 - 413s - loss: 0.1361 - accuracy: 0.9564 - val_loss: 0.1366 - val_accuracy: 0.9601 - 413s/epoch - 1s/step
Epoch 2/8
310/310 - 352s - loss: 0.0976 - accuracy: 0.9685 - val_loss: 0.1423 - val_accuracy: 0.9492 - 352s/epoch - 1s/step
Epoch 3/8
310/310 - 351s - loss: 0.0746 - accuracy: 0.9762 - val_loss: 0.1634 - val_accuracy: 0.9474 - 351s/epoch - 1s/step
Epoch 4/8
310/310 - 350s - loss: 0.0556 - accuracy: 0.9812 - val_loss: 0.1779 - val_accuracy: 0.9564 - 350s/epoch - 1s/step
Epoch 5/8
310/310 - 351s - loss: 0.0383 - accuracy: 0.9883 - val_loss: 0.1969 - val_accuracy: 0.9492 - 351s/epoch - 1s/step
Epoch 6/8
310/310 - 351s - loss: 0.0404 - accuracy: 0.9857 - val_loss: 0.2246 - val_accuracy: 0.9419 - 351s/epoch - 1s/step
Epoch 7/8
310/310 - 351s - loss: 0.0325 - accuracy: 0.9879 - val_loss: 0.2464 - val_accuracy: 0.9456 - 351s/epoch - 1s/step
Epoch 8/8
310/310 - 351s - loss: 0.0276 - accuracy: 0.9897 - val_loss: 0.2402 - val_accuracy: 0.9437 - 351s/epoch - 1s/step
[0.960072576999664



310/310 - 411s - loss: 0.0562 - accuracy: 0.9820 - val_loss: 0.0261 - val_accuracy: 0.9927 - 411s/epoch - 1s/step
Epoch 2/8
310/310 - 350s - loss: 0.0398 - accuracy: 0.9869 - val_loss: 0.0413 - val_accuracy: 0.9891 - 350s/epoch - 1s/step
Epoch 3/8
310/310 - 350s - loss: 0.0327 - accuracy: 0.9895 - val_loss: 0.0407 - val_accuracy: 0.9873 - 350s/epoch - 1s/step
Epoch 4/8
310/310 - 350s - loss: 0.0256 - accuracy: 0.9901 - val_loss: 0.0547 - val_accuracy: 0.9873 - 350s/epoch - 1s/step
Epoch 5/8
310/310 - 349s - loss: 0.0222 - accuracy: 0.9899 - val_loss: 0.0596 - val_accuracy: 0.9837 - 349s/epoch - 1s/step
Epoch 6/8
310/310 - 350s - loss: 0.0245 - accuracy: 0.9897 - val_loss: 0.0960 - val_accuracy: 0.9746 - 350s/epoch - 1s/step
Epoch 7/8
310/310 - 350s - loss: 0.0260 - accuracy: 0.9907 - val_loss: 0.0628 - val_accuracy: 0.9837 - 350s/epoch - 1s/step
Epoch 8/8
310/310 - 349s - loss: 0.0228 - accuracy: 0.9923 - val_loss: 0.0696 - val_accuracy: 0.9800 - 349s/epoch - 1s/step
[0.992740452289581



310/310 - 412s - loss: 0.0314 - accuracy: 0.9891 - val_loss: 0.0082 - val_accuracy: 0.9946 - 412s/epoch - 1s/step
Epoch 2/8
310/310 - 351s - loss: 0.0243 - accuracy: 0.9897 - val_loss: 0.0101 - val_accuracy: 0.9946 - 351s/epoch - 1s/step
Epoch 3/8
310/310 - 350s - loss: 0.0184 - accuracy: 0.9919 - val_loss: 0.0126 - val_accuracy: 0.9946 - 350s/epoch - 1s/step
Epoch 4/8
310/310 - 351s - loss: 0.0235 - accuracy: 0.9905 - val_loss: 0.0172 - val_accuracy: 0.9909 - 351s/epoch - 1s/step
Epoch 5/8
310/310 - 350s - loss: 0.0176 - accuracy: 0.9921 - val_loss: 0.0110 - val_accuracy: 0.9964 - 350s/epoch - 1s/step
Epoch 6/8
310/310 - 350s - loss: 0.0237 - accuracy: 0.9905 - val_loss: 0.0252 - val_accuracy: 0.9873 - 350s/epoch - 1s/step
Epoch 7/8
310/310 - 351s - loss: 0.0203 - accuracy: 0.9917 - val_loss: 0.0118 - val_accuracy: 0.9946 - 351s/epoch - 1s/step
Epoch 8/8
310/310 - 350s - loss: 0.0154 - accuracy: 0.9929 - val_loss: 0.0288 - val_accuracy: 0.9873 - 350s/epoch - 1s/step
[0.994555354118347



310/310 - 412s - loss: 0.0239 - accuracy: 0.9897 - val_loss: 0.0079 - val_accuracy: 0.9964 - 412s/epoch - 1s/step
Epoch 2/8
310/310 - 350s - loss: 0.0168 - accuracy: 0.9927 - val_loss: 0.0162 - val_accuracy: 0.9909 - 350s/epoch - 1s/step
Epoch 3/8
310/310 - 350s - loss: 0.0127 - accuracy: 0.9927 - val_loss: 0.0119 - val_accuracy: 0.9927 - 350s/epoch - 1s/step
Epoch 4/8
310/310 - 350s - loss: 0.0124 - accuracy: 0.9933 - val_loss: 0.0100 - val_accuracy: 0.9964 - 350s/epoch - 1s/step
Epoch 5/8
310/310 - 351s - loss: 0.0137 - accuracy: 0.9929 - val_loss: 0.0136 - val_accuracy: 0.9964 - 351s/epoch - 1s/step
Epoch 6/8
310/310 - 350s - loss: 0.0194 - accuracy: 0.9919 - val_loss: 0.0164 - val_accuracy: 0.9927 - 350s/epoch - 1s/step
Epoch 7/8
310/310 - 350s - loss: 0.0097 - accuracy: 0.9948 - val_loss: 0.0237 - val_accuracy: 0.9946 - 350s/epoch - 1s/step
Epoch 8/8
310/310 - 350s - loss: 0.0189 - accuracy: 0.9903 - val_loss: 0.0366 - val_accuracy: 0.9891 - 350s/epoch - 1s/step
[0.996370255947113



310/310 - 411s - loss: 0.0219 - accuracy: 0.9899 - val_loss: 0.0066 - val_accuracy: 0.9982 - 411s/epoch - 1s/step
Epoch 2/8




310/310 - 381s - loss: 0.0132 - accuracy: 0.9935 - val_loss: 0.0024 - val_accuracy: 0.9982 - 381s/epoch - 1s/step
Epoch 3/8
310/310 - 347s - loss: 0.0144 - accuracy: 0.9917 - val_loss: 0.0043 - val_accuracy: 0.9964 - 347s/epoch - 1s/step
Epoch 4/8
310/310 - 347s - loss: 0.0164 - accuracy: 0.9923 - val_loss: 0.0045 - val_accuracy: 0.9982 - 347s/epoch - 1s/step
Epoch 5/8
310/310 - 347s - loss: 0.0112 - accuracy: 0.9929 - val_loss: 0.0080 - val_accuracy: 0.9964 - 347s/epoch - 1s/step
Epoch 6/8
310/310 - 347s - loss: 0.0172 - accuracy: 0.9905 - val_loss: 0.0136 - val_accuracy: 0.9946 - 347s/epoch - 1s/step
Epoch 7/8
310/310 - 353s - loss: 0.0107 - accuracy: 0.9937 - val_loss: 0.0178 - val_accuracy: 0.9946 - 353s/epoch - 1s/step
Epoch 8/8
310/310 - 349s - loss: 0.0126 - accuracy: 0.9945 - val_loss: 0.0351 - val_accuracy: 0.9891 - 349s/epoch - 1s/step
[0.9981850981712341, 0.9981850981712341, 0.996370255947113, 0.9981850981712341, 0.996370255947113, 0.9945553541183472, 0.9945553541183472, 0.9



310/310 - 407s - loss: 0.0177 - accuracy: 0.9935 - val_loss: 0.0045 - val_accuracy: 0.9964 - 407s/epoch - 1s/step
Epoch 2/8
