In [3]:
import pandas as pd
import numpy as np
from transformers import BertTokenizer, TFBertModel
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler


# Load data
data = pd.read_excel('score_model/Interview Responses Combined.xlsx')
data.dropna(inplace=True)

# Split into features and targets
X = data['Responses'].tolist()
y = data[['Confidence', 'Fluency']].values

# Scale target values using Min-Max Scaling
scaler = MinMaxScaler()
y_scaled = scaler.fit_transform(y)

# Load BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = TFBertModel.from_pretrained('bert-base-uncased')

# Tokenize and encode responses
def encode_text(text_list):
    encoding = tokenizer(
        text_list,
        padding='max_length',  # Force consistent padding
        truncation=True,
        max_length=128,
        return_tensors="tf"
    )
    return encoding

X_encoded = encode_text(X)

# Convert tensors to numpy arrays for training compatibility
X_input_ids = np.array(X_encoded['input_ids'])
X_attention_mask = np.array(X_encoded['attention_mask'])

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_input_ids, y_scaled, test_size=0.2, random_state=42
)

# BERT Embedding Layer
input_ids = Input(shape=(128,), dtype=tf.int32, name='input_ids')
attention_mask = Input(shape=(128,), dtype=tf.int32, name='attention_mask')

# Pass both input_ids and attention_mask to BERT
embedding = bert_model(input_ids, attention_mask=attention_mask)[0][:, 0, :]  # CLS token output

# Neural Network layers
x = Dense(128, activation='relu')(embedding)
x = Dropout(0.3)(x)  # Dropout to prevent overfitting
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(2, activation='sigmoid')(x)  # Sigmoid for output between 0 and 1

# Build model with both inputs
model = Model(inputs=[input_ids, attention_mask], outputs=output)
model.compile(optimizer=Adam(learning_rate=1e-5), loss='huber_loss')

# Train model
history = model.fit(
    {'input_ids': X_train, 'attention_mask': X_attention_mask[:len(X_train)]},
    y_train,
    validation_split=0.2,
    epochs=10,
    batch_size=8
)

# Predict on test set
y_pred = model.predict({
    'input_ids': X_test,
    'attention_mask': X_attention_mask[:len(X_test)]
})

# Inverse transform to original scale
y_pred_rescaled = scaler.inverse_transform(y_pred)
y_test_rescaled = scaler.inverse_transform(y_test)

# Evaluation
mse = mean_squared_error(y_test_rescaled, y_pred_rescaled, multioutput='raw_values')
mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled, multioutput='raw_values')
r2 = r2_score(y_test_rescaled, y_pred_rescaled, multioutput='raw_values')

print(f"Mean Squared Error (Confidence, Fluency): {mse}")
print(f"Mean Absolute Error (Confidence, Fluency): {mae}")
print(f"R² Score (Confidence, Fluency): {r2}")

# Prediction function
def predict_response(response):
    encoded = encode_text([response])
    prediction = model.predict({
        'input_ids': np.array(encoded['input_ids']),
        'attention_mask': np.array(encoded['attention_mask'])
    })
    prediction_rescaled = scaler.inverse_transform(prediction)[0]
    confidence, fluency = prediction_rescaled
    return {"Confidence": round(confidence, 2), "Fluency": round(fluency, 2)}





Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Epoch 1/10

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Mean Squared Error (Confidence, Fluency): [15.12705757 10.18232526]
Mean Absolute Error (Confidence, Fluency): [3.52057049 2.7496931 ]
R² Score (Confidence, Fluency): [0.98137328 0.98478538]


In [5]:
# Example Prediction
new_response = "Um, I have experience in project management and, uh, data analysis."
result = predict_response(new_response)
print(f"Predicted Scores: {result}")
print(f"R² Score (Confidence, Fluency): {r2}")



Predicted Scores: {'Confidence': 26.34, 'Fluency': 35.87}
R² Score (Confidence, Fluency): [0.98137328 0.98478538]


In [6]:
# Save the trained model in TensorFlow format
model.save("saved_model/bert_interview_scorer")

INFO:tensorflow:Assets written to: saved_model/bert_interview_scorer\assets


INFO:tensorflow:Assets written to: saved_model/bert_interview_scorer\assets


In [7]:
import joblib

# Save the scaler
joblib.dump(scaler, "saved_model/scaler.pkl")


['saved_model/scaler.pkl']

In [8]:
# Save the tokenizer
tokenizer.save_pretrained("saved_model/tokenizer")


('saved_model/tokenizer\\tokenizer_config.json',
 'saved_model/tokenizer\\special_tokens_map.json',
 'saved_model/tokenizer\\vocab.txt',
 'saved_model/tokenizer\\added_tokens.json')

In [2]:
from tensorflow.keras.models import load_model
from transformers import BertTokenizer, TFBertModel
import joblib

# Load the model with custom_objects
model = load_model("saved_model/bert_interview_scorer", custom_objects={"TFBertModel": TFBertModel})

# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Load the scaler
scaler = joblib.load("saved_model/scaler.pkl")

# Define the function again
def predict_response(response):
    encoded = tokenizer(
        [response],
        padding="max_length",
        truncation=True,
        max_length=128,
        return_tensors="tf"
    )
    
    prediction = model.predict({
        'input_ids': encoded['input_ids'],
        'attention_mask': encoded['attention_mask']
    })
    
    prediction_rescaled = scaler.inverse_transform(prediction)[0]
    confidence, fluency = prediction_rescaled
    return {"Confidence": round(confidence, 2), "Fluency": round(fluency, 2)}





In [4]:
# Test the loaded model
response_text = "Um, I have experience in project management and, uh, data analysis."
result = predict_response(response_text)

print(result)

{'Confidence': 26.34, 'Fluency': 35.87}


In [3]:
!pip uninstall -y keras
# !pip install keras==2.15.0
# !pip install tf-keras



Found existing installation: keras 3.9.0
Uninstalling keras-3.9.0:
  Successfully uninstalled keras-3.9.0


In [4]:
!pip uninstall -y tensorflow tensorflow-intel tensorflow-estimator keras tf-keras
!pip uninstall -y tensorflow-io tensorflow-io-gcs-filesystem

Found existing installation: tensorflow-intel 2.15.0
Uninstalling tensorflow-intel-2.15.0:
  Successfully uninstalled tensorflow-intel-2.15.0
Found existing installation: tensorflow-estimator 2.15.0
Uninstalling tensorflow-estimator-2.15.0:
  Successfully uninstalled tensorflow-estimator-2.15.0
Found existing installation: tf-keras 2.19.0
Uninstalling tf-keras-2.19.0:
  Successfully uninstalled tf-keras-2.19.0




Found existing installation: tensorflow-io-gcs-filesystem 0.31.0
Uninstalling tensorflow-io-gcs-filesystem-0.31.0:
  Successfully uninstalled tensorflow-io-gcs-filesystem-0.31.0




In [5]:
!pip install tensorflow==2.15.0 keras==2.15.0
# # !pip install tf-keras

Collecting tensorflow==2.15.0
  Using cached tensorflow-2.15.0-cp39-cp39-win_amd64.whl (2.1 kB)
Collecting keras==2.15.0
  Using cached keras-2.15.0-py3-none-any.whl (1.7 MB)
Collecting tensorflow-intel==2.15.0
  Using cached tensorflow_intel-2.15.0-cp39-cp39-win_amd64.whl (300.8 MB)
Collecting tensorflow-estimator<2.16,>=2.15.0
  Using cached tensorflow_estimator-2.15.0-py2.py3-none-any.whl (441 kB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1
  Using cached tensorflow_io_gcs_filesystem-0.31.0-cp39-cp39-win_amd64.whl (1.5 MB)




Installing collected packages: tensorflow-io-gcs-filesystem, tensorflow-estimator, keras, tensorflow-intel, tensorflow
Successfully installed keras-2.15.0 tensorflow-2.15.0 tensorflow-estimator-2.15.0 tensorflow-intel-2.15.0 tensorflow-io-gcs-filesystem-0.31.0


In [2]:
import keras
import tensorflow as tf
import transformers

print("Keras Version:", keras.__version__)
print("TensorFlow Version:", tf.__version__)
print("Transformers Version:", transformers.__version__)


Keras Version: 2.15.0
TensorFlow Version: 2.15.0
Transformers Version: 4.49.0


In [4]:
def get_next_difficulty(accuracy, confidence, fluency, current_difficulty):
    # Calculate weighted score
    weighted_score = (accuracy * 0.5) + (confidence * 2) + (fluency * 2)

    # Adaptive difficulty selection
    if weighted_score >= 80 and current_difficulty < 2:
        return current_difficulty + 1  # Move to harder question
    elif weighted_score <= 50 and current_difficulty > 0:
        return current_difficulty - 1  # Move to easier question
    return current_difficulty  # Keep the same difficulty
