# preparing dataset

## loading raw data

In [2]:
import json

with open("../data/extended_esc_13b.json", "r") as f:
    data = json.load(f)
    

## converting to chat format

In [14]:
from copy import copy

processed_convs = []

print("no of conversations: ", len(data))

for ex in data:
    messages = []
    speakers = ['user' if s == 'seeker' else 'assistant' for s in ex['speakers']]
    turns = [t.strip() for t in ex['dialog']]
    
    for s, t in zip(speakers, turns):
        messages.append({'role': s, 'content': t})
        
    for label, resp in ex['responses'].items():
        cur_msgs = copy(messages)
        cur_msgs.append({'role': 'assistant', 'content': resp})
        
        processed_convs.append({'messages': cur_msgs, 'strategy': label})
    
    
print("no of sampels: ", len(processed_convs))
processed_convs[10]

no of conversations:  9354
no of sampels:  41822


{'messages': [{'role': 'user',
   'content': 'hello im looking for someone to talk to  im fine how are you'},
  {'role': 'assistant',
   'content': "I'm doing ok I'm glad you are good. Is it snowing by you? Merry Christmas!"},
  {'role': 'user',
   'content': 'thats great and no its not snowing its very cold thow\n merry christmas to you also'},
  {'role': 'assistant', 'content': 'How can I help you today?'},
  {'role': 'user',
   'content': 'im having some issues with friends not actually being friends'},
  {'role': 'assistant',
   'content': "I hear you are having trouble figuring out which friends are really your friends and which ones aren't. Is that about right?"},
  {'role': 'user', 'content': 'yes'},
  {'role': 'assistant',
   'content': 'I understand that... Did something happen that makes you feel like that?'},
  {'role': 'user',
   'content': 'yes their is no communication or interaction between me and them nobody answers phone calls txt messages etc'},
  {'role': 'assistant'

## taking a sample of data for PoC

In [21]:
import numpy as np

np.random.shuffle(processed_convs)

sample = processed_convs[:10]

# probing the token embeddings of the responses

## loading a dummy model

In [48]:
from transformers import LlamaTokenizer, LlamaModel, LlamaConfig
from typing import Dict, List
import torch


tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
config = LlamaConfig(num_hidden_layers=1)
model = LlamaModel(config=config)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
model.eval()

def get_assistant_response_embeddings(conv: List[Dict[str, str]]) -> np.ndarray:
    prompt_tokens = tokenizer.apply_chat_template(conv[:-1], tokenize=True)
    prompt_len = len(prompt_tokens)

    full_prompt = tokenizer.apply_chat_template(conv, tokenize=True)
    print("base prompt len:", prompt_len)
    print("full prompt len:", len(full_prompt))
    with torch.no_grad():
        outputs = model(input_ids=torch.tensor([full_prompt]).to(device))
    
    out_emb = outputs.last_hidden_state.cpu().numpy()[0]
    return out_emb[prompt_len:-1, :]

out = get_assistant_response_embeddings(sample[0]['messages'])

base prompt len: 339
full prompt len: 438


In [49]:
out.shape

(98, 4096)

# train projection weights

## load training data

In [50]:
import pickle

with open("../data/sample_embeddings.pkl", "rb") as f:
    data = pickle.load(f)

In [52]:
data[1][:10], data[0].shape

(['Provide Different Perspectives',
  'Provide Different Perspectives',
  'Provide Different Perspectives',
  'Provide Different Perspectives',
  'Provide Different Perspectives',
  'Provide Different Perspectives',
  'Provide Different Perspectives',
  'Provide Different Perspectives',
  'Provide Different Perspectives',
  'Provide Different Perspectives'],
 (114929, 4096))

In [59]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score

label_encoder = LabelEncoder()
y_numeric = label_encoder.fit_transform(data[1])

X_train, X_test, y_train, y_test = train_test_split(data[0], y_numeric, test_size=0.3, random_state=42, shuffle=True)

# Create and train the logistic regression model
model = RidgeClassifier()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

f1 = f1_score(y_test, y_pred, average='macro')
print(f'f1 score: {f1:.2f}')

Accuracy: 0.75
f1 score: 0.75


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
