In [6]:
import tensorflow as tf
from transformers import TFBertModel

# Test loading a simple model
try:
    bert_model = TFBertModel.from_pretrained('bert-base-uncased')
    print("Model loaded successfully")
    
    # Explicitly build the model to ensure it doesn't have issues
    dummy_input_ids = tf.keras.Input(shape=(200,), dtype=tf.int32)
    dummy_attention_mask = tf.keras.Input(shape=(200,), dtype=tf.int32)
    _ = bert_model([dummy_input_ids, dummy_attention_mask])

    print("Model built successfully")
except Exception as e:
    print(f"Error: {e}")


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Model loaded successfully
Model built successfully


In [1]:
%pip install pandas numpy scikit-learn transformers tensorflow matplotlib glob2


Collecting pandas
  Downloading pandas-2.2.2-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.5.1-cp312-cp312-win_amd64.whl.metadata (12 kB)
Collecting matplotlib
  Downloading matplotlib-3.9.2-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting glob2
  Downloading glob2-0.7.tar.gz (10 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.14.1-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Collecti

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from transformers import BertTokenizer, TFBertModel
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt
import glob

# Load and concatenate all CSV files into a single DataFrame
def load_aspect_data(path):
    all_files = glob.glob(path)
    all_data = []
    for filename in all_files:
        df = pd.read_csv(filename)
        all_data.append(df)
    combined_df = pd.concat(all_data, ignore_index=True)
    print(f"Loaded {len(all_files)} CSV files.")
    return combined_df

aspect_path = r"C:\Users\andyb\Desktop\Coding Files\PointView\datasets\aspect_based_dataset\*.csv"
aspect_df = load_aspect_data(aspect_path)

# Ensure 'Classification' is categorical and has correct unique values
specific_kpis = ['food', 'staff', 'comfort & facilities', 'value for money']

# Binary encoding for each KPI in 'Classification' column
def check_kpi(classification, kpi):
    if isinstance(classification, str):
        return 1 if kpi.lower() in classification.lower() else 0
    return 0

for kpi in specific_kpis:
    aspect_df[kpi] = aspect_df['Classification'].apply(lambda x: check_kpi(x, kpi))

# Prepare features (X) and labels (y)
X = aspect_df['Opinion'].values
y = aspect_df[specific_kpis].values

# Load pre-trained BERT tokenizer and model
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Function to tokenize and encode data
def bert_encode(texts, tokenizer, max_len=200):
    input_ids = []
    attention_masks = []

    for text in texts:
        encoded = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=max_len,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='tf'
        )
        input_ids.append(encoded['input_ids'])
        attention_masks.append(encoded['attention_mask'])

    return {
        'input_ids': tf.concat(input_ids, axis=0),
        'attention_mask': tf.concat(attention_masks, axis=0)
    }

X_encoded = bert_encode(X, bert_tokenizer)

# Convert TensorFlow tensors to NumPy arrays before splitting
X_input_ids = X_encoded['input_ids'].numpy()
X_attention_masks = X_encoded['attention_mask'].numpy()

# Split the data into training and testing sets
X_train_ids, X_test_ids, y_train, y_test = train_test_split(X_input_ids, y, test_size=0.2, random_state=42)
attention_train, attention_test = train_test_split(X_attention_masks, test_size=0.2, random_state=42)

# Load BERT model
bert_model = TFBertModel.from_pretrained('bert-base-uncased')

# Build the BERT-based model
input_ids = tf.keras.Input(shape=(200,), dtype=tf.int32, name='input_ids')
attention_masks = tf.keras.Input(shape=(200,), dtype=tf.int32, name='attention_mask')

# Pass input_ids and attention_masks as separate arguments
bert_output = bert_model(input_ids=input_ids, attention_mask=attention_masks)[1]  # [1] is the pooled output
dense = Dense(64, activation='relu')(bert_output)
dropout = Dropout(0.3)(dense)
output = Dense(len(specific_kpis), activation='sigmoid')(dropout)

model = tf.keras.Model(inputs=[input_ids, attention_masks], outputs=output)
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)

# Train the BERT model
history = model.fit(
    [X_train_ids, attention_train],
    y_train,
    batch_size=16,
    epochs=10,
    validation_split=0.1,
    callbacks=[early_stopping, lr_reducer]
)

# Evaluate the model
loss, accuracy = model.evaluate([X_test_ids, attention_test], y_test)
print(f"Test accuracy: {accuracy:.4f}")

# Detailed classification report
y_pred = model.predict([X_test_ids, attention_test])
y_pred_binary = (y_pred > 0.5).astype(int)
print(classification_report(y_test, y_pred_binary, target_names=specific_kpis))

# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.title('Loss Over Epochs')

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.title('Accuracy Over Epochs')

plt.show()

# Function to predict KPI likelihoods for new data using BERT
def predict_kpi_likelihoods(text):
    encoded_text = bert_encode([text], bert_tokenizer)
    prediction = model.predict([encoded_text['input_ids'].numpy(), encoded_text['attention_mask'].numpy()])[0]
    return {kpi: float(likelihood) for kpi, likelihood in zip(specific_kpis, prediction)}

# Test the model on a sample review
sample_review = "The room was clean and comfortable, but the staff was not very friendly. The food was excellent."
predicted_likelihoods = predict_kpi_likelihoods(sample_review)
print("\nPredicted KPI likelihoods for the sample review:")
for kpi, likelihood in predicted_likelihoods.items():
    print(f"{kpi}: {likelihood:.2%}")


Loaded 29 CSV files.


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ValueError: Exception encountered when calling layer 'tf_bert_model_3' (type TFBertModel).

Data of type <class 'keras.src.backend.common.keras_tensor.KerasTensor'> is not allowed only (<class 'tensorflow.python.framework.tensor.Tensor'>, <class 'bool'>, <class 'int'>, <class 'transformers.utils.generic.ModelOutput'>, <class 'tuple'>, <class 'list'>, <class 'dict'>, <class 'numpy.ndarray'>) is accepted for attention_mask.

Call arguments received by layer 'tf_bert_model_3' (type TFBertModel):
  • input_ids=<KerasTensor shape=(None, 200), dtype=int32, sparse=False, name=input_ids>
  • attention_mask=<KerasTensor shape=(None, 200), dtype=int32, sparse=False, name=attention_mask>
  • token_type_ids=None
  • position_ids=None
  • head_mask=None
  • inputs_embeds=None
  • encoder_hidden_states=None
  • encoder_attention_mask=None
  • past_key_values=None
  • use_cache=None
  • output_attentions=None
  • output_hidden_states=None
  • return_dict=None
  • training=False