In [5]:
! pip install bert_score



In [6]:
def fetch_pairs(file_path):
  # Replace with your file path

  # Initialize an empty list to store the tuples
  chat_data = []

  # Read the file
  with open(file_path, 'r', encoding='utf-8') as file:
      for line in file:
          # Split the line into question and answer using '\t' as a separator
          parts = line.strip().split('\t')
          if len(parts) == 2:  # Ensure the line has both question and answer
              question, answer = parts
              chat_data.append((question, answer))
  return chat_data



In [14]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import heapq
from bert_score import score
import pandas as pd

# Hyperparameters
EMBEDDING_DIM = 256
LSTM_UNITS = 512
# BATCH_SIZE = 64
EPOCHS = 1
OOV_TOKEN = "<OOV>"
START_TOKEN = "<start>"
END_TOKEN = "<end>"
BEAM_WIDTH = 3
MAX_REPEAT_TOKENS = 2
# Hyperparameters (revised for faster experimentation)
EMBEDDING_DIM = 256  # Reduced embedding size
LSTM_UNITS = 512     # Reduced LSTM units
BATCH_SIZE = 64      # Smaller batch size to speed up training with limited resources
EPOCHS = 30           # Reduced epochs to test quickly
BEAM_WIDTH = 3       # Lowered beam width for faster decoding

# Optional: Use a smaller sample of data for quicker experimentation
 # Use only 10% of the data for training (remove for full dataset)


# Load data from CSV
csv_path = 'df_with_emotion_labels.csv'
df = pd.read_csv(csv_path)
# df = df.sample()
# Create pairs with and without emotions
pairs = []
pairs_without_emotion = []
for index, row in df.iterrows():
    user_message = row['empathetic_dialogues']
    response = row['labels']
    emotion = row.get('emotion_label', None)
    pairs.append((f"Emotion: {emotion}. {user_message}", response))
    # pairs_without_emotion.append((user_message, response))

pairs_without_emotion=fetch_pairs(r'dialogs.txt')
# Prepare the data with emotion labels
input_texts, target_texts = [], []
for input_text, target_text in pairs:
    input_texts.append(input_text)
    target_texts.append(f"{START_TOKEN} {target_text} {END_TOKEN}")

# Tokenize the input and target texts
tokenizer = Tokenizer(oov_token=OOV_TOKEN)
tokenizer.fit_on_texts(input_texts + target_texts)
input_sequences = tokenizer.texts_to_sequences(input_texts)
target_sequences = tokenizer.texts_to_sequences(target_texts)

# Padding sequences
max_input_len = max(len(seq) for seq in input_sequences)
max_target_len = max(len(seq) for seq in target_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_input_len, padding='post')
target_sequences = pad_sequences(target_sequences, maxlen=max_target_len, padding='post')

# Vocabulary size
vocab_size = len(tokenizer.word_index) + 1

# Prepare training data for model with emotion labels
decoder_input_data = np.array([seq[:-1] for seq in target_sequences])
decoder_target_data = np.array([seq[1:] for seq in target_sequences])

# Encoder input
encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(vocab_size, EMBEDDING_DIM)(encoder_inputs)
encoder_lstm = LSTM(LSTM_UNITS, return_state=True)
_, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]

# Decoder input
decoder_inputs = Input(shape=(None,))
decoder_embedding_layer = Embedding(vocab_size, EMBEDDING_DIM)
decoder_embedding = decoder_embedding_layer(decoder_inputs)
decoder_lstm = LSTM(LSTM_UNITS, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model with emotion labels
model_with_emotion = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model_with_emotion.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model with emotion labels
model_with_emotion.fit(
    [input_sequences, decoder_input_data],
    decoder_target_data,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_split=0.2
)

# Save the trained model with emotion labels
model_with_emotion.save('model_with_emotion.h5')

# Prepare the data without emotion labels
input_texts_no_emotion, target_texts_no_emotion = [], []
for input_text, target_text in pairs_without_emotion:
    input_texts_no_emotion.append(input_text)
    target_texts_no_emotion.append(f"{START_TOKEN} {target_text} {END_TOKEN}")

# Tokenize and pad sequences without emotion labels
input_sequences_no_emotion = tokenizer.texts_to_sequences(input_texts_no_emotion)
target_sequences_no_emotion = tokenizer.texts_to_sequences(target_texts_no_emotion)
input_sequences_no_emotion = pad_sequences(input_sequences_no_emotion, maxlen=max_input_len, padding='post')
target_sequences_no_emotion = pad_sequences(target_sequences_no_emotion, maxlen=max_target_len, padding='post')

# Prepare training data for the model without emotion labels
decoder_input_data_no_emotion = np.array([seq[:-1] for seq in target_sequences_no_emotion])
decoder_target_data_no_emotion = np.array([seq[1:] for seq in target_sequences_no_emotion])

# Train the model without emotion labels
model_without_emotion = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model_without_emotion.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model_without_emotion.fit(
    [input_sequences_no_emotion, decoder_input_data_no_emotion],
    decoder_target_data_no_emotion,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_split=0.2
)

# Save the trained model without emotion labels
model_without_emotion.save('model_without_emotion.h5')

# Inference Models (Shared by both versions)
# Encoder inference model
encoder_inputs = model_with_emotion.input[0]
encoder_states = model_with_emotion.layers[4].output[1:]  # Getting the states from the LSTM layer
encoder_model = Model(encoder_inputs, encoder_states)

# Decoder inference model
decoder_state_input_h = Input(shape=(LSTM_UNITS,))
decoder_state_input_c = Input(shape=(LSTM_UNITS,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_inputs = model_with_emotion.input[1]
decoder_embedding_layer = model_with_emotion.layers[3]
decoder_lstm = model_with_emotion.layers[5]
decoder_dense = model_with_emotion.layers[6]

decoder_embedding2 = decoder_embedding_layer(decoder_inputs)
decoder_outputs2, state_h2, state_c2 = decoder_lstm(decoder_embedding2, initial_state=decoder_states_inputs)
decoder_states2 = [state_h2, state_c2]
decoder_outputs2 = decoder_dense(decoder_outputs2)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs2] + decoder_states2
)

# Function to generate responses with Beam Search
def decode_sequence_beam_search(input_seq, beam_width=BEAM_WIDTH):
    # Encode the input as state vectors
    states_value = encoder_model.predict(input_seq)

    # Initialize the beam search variables
    sequences = [[list(), 0.0, states_value]]
    stop_condition = False
    decoded_sentences = []

    while len(sequences) > 0 and not stop_condition:
        all_candidates = []
        for seq, score, states_value in sequences:
            target_seq = np.zeros((1, 1), dtype='int32')
            if len(seq) == 0:
                target_seq[0, 0] = tokenizer.word_index.get(START_TOKEN, 1)
            else:
                target_seq[0, 0] = seq[-1]

            output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

            for i in range(vocab_size):
                # Apply penalty for generating <OOV> tokens
                if tokenizer.index_word.get(i, OOV_TOKEN) == OOV_TOKEN:
                    penalty = 5.0  # Increase penalty for <OOV>
                else:
                    penalty = 1.0
                candidate = [seq + [i], score - penalty * np.log(output_tokens[0, -1, i]), [h, c]]
                all_candidates.append(candidate)

        # Order all candidates by score
        ordered = sorted(all_candidates, key=lambda x: x[1])
        sequences = ordered[:beam_width]

        # Check if we reach the end token or maximum sequence length
        stop_condition = True
        for seq, score, states_value in sequences:
            if len(seq) < max_target_len and tokenizer.index_word.get(seq[-1], OOV_TOKEN) != END_TOKEN:
                stop_condition = False
                break

    # Select the sequence with the highest score
    best_sequence = sequences[0][0]
    decoded_sentence = []
    repeated_tokens_count = 0
    previous_token = None
    for index in best_sequence:
        word = tokenizer.index_word.get(index, OOV_TOKEN)
        if word == previous_token:
            repeated_tokens_count += 1
        else:
            repeated_tokens_count = 0

        if repeated_tokens_count < MAX_REPEAT_TOKENS:
            decoded_sentence.append(word)
        previous_token = word

    return ' '.join([word for word in decoded_sentence if word != START_TOKEN and word != END_TOKEN]).strip()

# Example usage with Beam Search using both models
input_sentence = "I do actually hit blank walls a lot of times but i get by"
input_sequence = pad_sequences(tokenizer.texts_to_sequences([input_sentence]), maxlen=max_input_len, padding='post')

# Generate response with emotion model
response_with_emotion = decode_sequence_beam_search(input_sequence)
print("Bot response with emotion label (Beam Search):", response_with_emotion)

# Generate response without emotion model
response_without_emotion = decode_sequence_beam_search(input_sequence)
print("Bot response without emotion label (Beam Search):", response_without_emotion)
# Evaluating the model using BERTScore
def evaluate_bert_score(reference_texts, generated_texts):
    # Ensure both lists have the same length by selecting one reference text
    # You might want to choose a more appropriate reference based on your use case
    # or iterate through all references and average the scores
    reference_text = [reference_texts[0]]  # Select the first reference text

    P, R, F1 = score(generated_texts, reference_text, lang='en', verbose=True) # Pass the single reference text
    print("BERTScore Precision:", P.mean().item())
    print("BERTScore Recall:", R.mean().item())
    print("BERTScore F1:", F1.mean().item())

# Example evaluation
reference_texts = ["Oh was this something that happened because of an argument?", "I see. I'm here to talk to you."]
generated_texts_with_emotion = [response_with_emotion]
generated_texts_without_emotion = [response_without_emotion]

print("\nEvaluating model with emotion label:")
evaluate_bert_score(reference_texts, generated_texts_with_emotion)

print("\nEvaluating model without emotion label:")
evaluate_bert_score(reference_texts, generated_texts_without_emotion)


Epoch 1/30
[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 65ms/step - accuracy: 0.8809 - loss: 1.1524 - val_accuracy: 0.8884 - val_loss: 0.6936
Epoch 2/30
[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 65ms/step - accuracy: 0.9007 - loss: 0.6054 - val_accuracy: 0.8929 - val_loss: 0.6366
Epoch 3/30
[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 65ms/step - accuracy: 0.9039 - loss: 0.5590 - val_accuracy: 0.8950 - val_loss: 0.6123
Epoch 4/30
[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 65ms/step - accuracy: 0.9064 - loss: 0.5300 - val_accuracy: 0.8962 - val_loss: 0.5999
Epoch 5/30
[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 65ms/step - accuracy: 0.9076 - loss: 0.5099 - val_accuracy: 0.8973 - val_loss: 0.5904
Epoch 6/30
[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 65ms/step - accuracy: 0.9094 - loss: 0.4880 - val_accuracy: 0.8980 - val_loss: 0.5847
Epoch 7/30
[1m8



Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 73ms/step - accuracy: 0.9474 - loss: 0.3662 - val_accuracy: 0.9441 - val_loss: 0.4027
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 66ms/step - accuracy: 0.9517 - loss: 0.2967 - val_accuracy: 0.9442 - val_loss: 0.3947
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 66ms/step - accuracy: 0.9544 - loss: 0.2635 - val_accuracy: 0.9445 - val_loss: 0.3913
Epoch 4/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 66ms/step - accuracy: 0.9573 - loss: 0.2315 - val_accuracy: 0.9443 - val_loss: 0.3908
Epoch 5/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 65ms/step - accuracy: 0.9607 - loss: 0.2060 - val_accuracy: 0.9441 - val_loss: 0.3923
Epoch 6/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 65ms/step - accuracy: 0.9627 - loss: 0.1849 - val_accuracy: 0.9442 - val_loss: 0.3939
Epoch 7/30
[1m47/47[0m [32m━━━━



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

  candidate = [seq + [i], score - penalty * np.log(output_tokens[0, -1, i]), [h, c]]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 0.05 seconds, 19.42 sentences/sec
BERTScore Precision: 0.7970021367073059
BERTScore Recall: 0.8168946504592896
BERTScore F1: 0.8068257570266724

Evaluating model without emotion label:


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 0.05 seconds, 19.38 sentences/sec
BERTScore Precision: 0.7970021367073059
BERTScore Recall: 0.8168946504592896
BERTScore F1: 0.8068257570266724


In [None]:
# Example usage with Beam Search
input_sentence = "Emotion: Joy. How are you?"
input_sequence = pad_sequences(tokenizer.texts_to_sequences([input_sentence]), maxlen=max_input_len, padding='post')
response_with_emotion = decode_sequence_beam_search(input_sequence)
print("Bot response with emotion label (Beam Search):", response_with_emotion)

# Example usage without emotion
input_sentence_no_emotion = "How are you?"
input_sequence_no_emotion = pad_sequences(tokenizer.texts_to_sequences([input_sentence_no_emotion]), maxlen=max_input_len, padding='post')
response_without_emotion = decode_sequence_beam_search(input_sequence_no_emotion)
print("Bot response without emotion label (Beam Search):", response_without_emotion)

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13


In [17]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model

# Load the saved models
model_with_emotion = load_model('model_with_emotion.h5')
model_without_emotion = load_model('model_without_emotion.h5')

# Hyperparameters (ensure these match the saved models)
START_TOKEN = "<start>"
END_TOKEN = "<end>"
OOV_TOKEN = "<OOV>"
MAX_SEQ_LENGTH = 20  # Maximum length of the response sequence
vocab_size = len(tokenizer.word_index) + 1  # Should match the original vocabulary size

def generate_response_direct(model, tokenizer, input_sentence, max_seq_length=MAX_SEQ_LENGTH):
    # Tokenize and pad the input sentence
    input_sequence = pad_sequences(tokenizer.texts_to_sequences([input_sentence]), maxlen=max_seq_length, padding='post')

    # Initialize target sequence with <START> token
    target_sequence = np.array([[tokenizer.word_index.get(START_TOKEN, 1)]])

    decoded_sentence = []
    for _ in range(max_seq_length):
        # Predict the next token probabilities
        output_tokens = model.predict([input_sequence, target_sequence], verbose=0)

        # Get the token with the highest probability
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_token = tokenizer.index_word.get(sampled_token_index, OOV_TOKEN)

        # Exit if the END token is generated
        if sampled_token == END_TOKEN:
            break

        # Append the token to the decoded sentence
        decoded_sentence.append(sampled_token)

        # Update the target sequence to include the newly generated token
        target_sequence = np.append(target_sequence, [[sampled_token_index]], axis=1)

    return ' '.join(decoded_sentence)

# Example usage for generating responses from saved models
input_sentence = "it feels like hitting to blank wall when i see the darkness"

# Generate response using model with emotion labels
response_with_emotion = generate_response_direct(model_with_emotion, tokenizer, input_sentence)
print("Bot response with emotion label:", response_with_emotion[:response_with_emotion.rfind('end')])

# Generate response using model without emotion labels
response_without_emotion = generate_response_direct(model_without_emotion, tokenizer, input_sentence)
print("Bot response without emotion label:", response_without_emotion[:response_without_emotion.rfind('end')])




Bot response with emotion label: that is a very noble gesture 
Bot response without emotion label: what did you do 
