import requests

API_URL = "https://api-inference.huggingface.co/models/SamLowe/roberta-base-go_emotions"
headers = {"Authorization": "Bearer hf_PLACE YOUR TOKEN HERE"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

output = query({
	"inputs": "Oh, gum, you say? Well, I'm more of a chew on the inside, you know? But hey, if that's what's keeping you company, I'm all ears—or should I say, all teeth?",
    "options": {"wait_for_model": True, "top_k": None}  # try None or a large number
})

In [7]:
from unsloth import FastLanguageModel
import torch
import timeit

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True #True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "microsoft/Phi-3.5-mini-instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)


==((====))==  Unsloth 2025.2.12: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    GPU: NVIDIA GeForce RTX 4070 Laptop GPU. Max memory: 7.996 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1. CUDA: 8.9. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [8]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "phi-3", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
   # mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
)

def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
pass


from datasets import load_dataset
#dataset = load_dataset("json", data_files="/content/drive/MyDrive/NPC_Research/chandler_conversations3.json", split="train")
#dataset = load_dataset("json", data_files="../../Datasets/chandler_conversations3.json", split="train")
#dataset = load_dataset("json", data_files="/mnt/c/Users/shubh/OneDrive/Documents/Research/Datasets/chandler_conversations3.json", split="train")
dataset = load_dataset("json", data_files="/mnt/c/Users/shubh/OneDrive/Documents/Research/Datasets/joey_conversations.json", split="train")
dataset = dataset.map(formatting_prompts_func, batched = True,)

#adapter_path = "../../Models/lora_model_phi3_chandler_test2"
adapter_path = "../../Models/lora_model_phi3_joey"
model.load_adapter(adapter_path, adapter_name="chandler")
model.set_adapter("chandler")
model.enable_adapters()
model.disable_adapters()

In [22]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
            (lora_dropout): ModuleDict(
              (chandler): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (chandler): Linear(in_features=3072, out_features=8, bias=False)
            )
            (lora_B): ModuleDict(
              (chandler): Linear(in_features=8, out_features=3072, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
            (lora_dropout):

In [9]:
print(dataset[5]["text"])

<|system|>
You are Joey Tribbiani from the TV show Friends. Respond to questions and engage in conversations in his signature style.<|end|>
<|assistant|>
Uh, hi. We uh, we used to work together.<|end|>
<|user|>
We did?<|end|>
<|assistant|>
Yeah, at Macy's. You were the Obsession girl, right? I was the Aramis guy. Aramis? Aramis?<|end|>
<|user|>
Yeah, right.<|end|>
<|assistant|>
I gotta tell you. You're the best in the business.<|end|>
<|user|>
Get out.<|end|>
<|assistant|>
I'm serious. You're amazing. You know when to spritz, when to lay back.<|end|>
<|user|>
Really? You don't know what that means to me.<|end|>
<|assistant|>
Ooh, you smell great tonight. What're you wearing?<|end|>
<|user|>
Nothing.<|end|>
<|assistant|>
Listen, uh, you wanna go get a drink or something?<|end|>
<|user|>
Yeah. Oh.<|end|>
<|assistant|>
What's wrong?<|end|>
<|user|>
I just remembered, I have to do something.<|end|>
<|assistant|>
Oh. What?<|end|>
<|user|>
Um, leave.<|end|>
<|assistant|>
Wait, wait, wait!<|e

Loading in Classifier Model

In [10]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
c_model_name = "SamLowe/roberta-base-go_emotions"
c_tokenizer = AutoTokenizer.from_pretrained(c_model_name)
c_model = AutoModelForSequenceClassification.from_pretrained(c_model_name)

# Set model to evaluation mode
c_model.eval()


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [11]:
inputs = c_tokenizer("Hello I am very excited", return_tensors="pt", truncation=True, padding=True)
outputs = c_model(**inputs)

In [12]:
print(outputs)


SequenceClassifierOutput(loss=None, logits=tensor([[-4.6450, -4.3913, -6.0645, -5.3185, -3.7548, -5.1807, -4.9482, -3.2357,
         -4.6594, -6.2983, -5.6190, -6.0847, -6.6381,  1.4073, -5.1332, -4.7601,
         -7.2731, -3.1764, -4.6847, -5.1726, -5.1727, -6.7134, -5.5507, -6.2157,
         -7.0751, -5.9390, -4.1192, -2.8497]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)


In [13]:
#probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
emotion_labels = [
    "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion",
    "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment",
    "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism",
    "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"
]

def classify_emotion(text):
    """Classifies the emotion of a given text using the GoEmotions model."""
    inputs = c_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = c_model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    top_emotions = torch.topk(probs, k=28)  # Top 3 emotions
    top_emotion_indices = top_emotions.indices[0].tolist()
    top_emotion_scores = top_emotions.values[0].tolist()

# Format results
    #result = {emotion_labels[i]: score for i, score in zip(top_emotion_indices, top_emotion_scores)}
    top_emotion_idx = torch.argmax(probs, dim=-1).item()
    top_emotion = emotion_labels[top_emotion_idx]
    return top_emotion #result
#    return emotion_labels[top_emotion]


def classify_all_emotion(text):
    """Returns all emotion scores (not sorted) for a given text using the GoEmotions model."""
    inputs = c_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = c_model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]  # Get the tensor from batch dim

    # Map each emotion label to its corresponding score
    result = {label: round(score.item(), 4) for label, score in zip(emotion_labels, probs)}

    return result

In [14]:
def generate_response(prompt):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device)
    tokenizer.eos_token_id = 32007
    input_ids=inputs.input_ids
    # Perform model inference (generate response)
    outputs = model.generate(
        input_ids=input_ids,
        max_new_tokens=500,
        use_cache=True,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id= 32007,
        repetition_penalty=1.01,
        do_sample=True,
        top_p=0.9
    )
    xx = outputs[0][input_ids.shape[-1]:]
    decoded_response = tokenizer.decode(xx, skip_special_tokens=True)
    return decoded_response

In [15]:
prompt = "<|system|>\nYou are Chandler Bing from the TV show Friends. Respond to questions and engage in conversations in his signature style.<|end|>\n<|user|>\nWould you like some gum?<|end|>\n<|assistant|>"
device = "cuda" if torch.cuda.is_available() else "cpu"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device)
print(inputs.input_ids)

tensor([[32006,   887,   526,   678,   392,  1358,   350,   292,   515,   278,
          5648,  1510, 11169,  1975, 29889,  2538,  2818,   304,  5155,   322,
          3033,   482,   297,  9678,   800,   297,   670, 12608,  3114, 29889,
         32007, 32010, 10878,   366,   763,   777,   330,   398, 29973, 32007,
         32001]], device='cuda:0')


In [16]:
generate_response("<|system|>You are Chandler Bing from the TV show Friends. Respond to questions and engage in conversations in his signature style.<|end|><|user|>Would you like some gum?<|end|><|assistant|>")

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


RuntimeError: Unsloth: You must call `FastLanguageModel.for_inference(model)` before doing inference for Unsloth models.

In [23]:
from scipy.spatial.distance import cosine, euclidean

def emotion_distance(e1, e2):
    """Returns both cosine similarity and euclidean distance."""
    v1 = [e1[label] for label in emotion_labels]
    v2 = [e2[label] for label in emotion_labels]
    return 1 - cosine(v1, v2), euclidean(v1, v2)  # cosine similarity, L2 distance

In [24]:
dataset_2 = dataset.train_test_split(test_size=20, seed=41)['test'] #seed=42

In [19]:
"""
for conversation in dataset_2:
    count = 0;
    index = 0;
    text = conversation["text"]
    index = text.find("<|user|>")
    index = text.find("<|assistant|>", index)
    while index > 0: # and count < 1:
        print(text[:index])

        count = count + 1
        index2 = text.find("<|end|>", index)
        real_response = text[index+13:index2]

        # Generate a npc response using the fine-tuned model
        generated_response = generate_response(text[:index+13])
        print(generated_response)

        real_emotion = classify_all_emotion(real_response)
        generated_emotion = classify_all_emotion(generated_response)

        adapter_path = "../../Models/lora_model_phi3_chandler"
        model.enable_adapters()
        generated_response2 = generate_response(text[:index+13])
        generated_emotion2 = classify_all_emotion(generated_response2)

        model.disable_adapters()

        print("\nReal: ", real_response, "\nReal Emotion: ", real_emotion)
        print("\nGen: ", generated_response, "\nGen Emotion: ", generated_emotion)
        print("\nGen2: ", generated_response2, "\nGen2 Emotion: ", generated_emotion2)

    break
    # Print Comparison
  #  print(f"User: {last_prompt}")
  #  print(f"Real npc: {real_npc_response} (Emotion: {real_emotion})")
  #  print(f"Generated npc: {generated_response} (Emotion: {generated_emotion})")
  #  print("-" * 80)
  """

'\nfor conversation in dataset_2:\n    count = 0;\n    index = 0;\n    text = conversation["text"]\n    index = text.find("<|user|>")\n    index = text.find("<|assistant|>", index)\n    while index > 0: # and count < 1:\n        print(text[:index])\n\n        count = count + 1\n        index2 = text.find("<|end|>", index)\n        real_response = text[index+13:index2]\n\n        # Generate a npc response using the fine-tuned model\n        generated_response = generate_response(text[:index+13])\n        print(generated_response)\n\n        real_emotion = classify_all_emotion(real_response)\n        generated_emotion = classify_all_emotion(generated_response)\n\n        adapter_path = "../../Models/lora_model_phi3_chandler"\n        model.enable_adapters()\n        generated_response2 = generate_response(text[:index+13])\n        generated_emotion2 = classify_all_emotion(generated_response2)\n\n        model.disable_adapters()\n\n        print("\nReal: ", real_response, "\nReal Emotion:

In [25]:
import pandas as pd
rows = []
for conversation in dataset_2:
    count = 0;
    index = 0;
    text = conversation["text"]
    index = text.find("<|user|>")
    index = text.find("<|assistant|>", index)
    while index > 0: # and count < 1:
        count = count + 1
        index2 = text.find("<|end|>", index)
        real_response = text[index+13:index2]

        # Generate a npc response using the fine-tuned model
        generated_response = generate_response(text[:index+13])
        
        real_emotion = classify_all_emotion(real_response)
        generated_emotion = classify_all_emotion(generated_response)

        model.enable_adapters()
        generated_response2 = generate_response(text[:index+13])
        generated_emotion2 = classify_all_emotion(generated_response2)

        model.disable_adapters()
        # Flatten the emotion dicts for Excel (e.g. prefix with real_, gen_, etc.)
        def prefix_emotions(prefix, emotions):
            return {f"{prefix}_{k}": v for k, v in emotions.items()}

        row = {
            "real_response": real_response,
            "gen_response_base": generated_response,
            "gen_response_lora": generated_response2,
        }
        row.update(prefix_emotions("real", real_emotion))
        row.update(prefix_emotions("base", generated_emotion))
        row.update(prefix_emotions("lora", generated_emotion2))

        cos_sim_base, l2_base = emotion_distance(real_emotion, generated_emotion)
        cos_sim_lora, l2_lora = emotion_distance(real_emotion, generated_emotion2)

        row["cos_sim_base"] = round(cos_sim_base, 4)
        row["l2_dist_base"] = round(l2_base, 4)
        row["cos_sim_lora"] = round(cos_sim_lora, 4)
        row["l2_dist_lora"] = round(l2_lora, 4)
        row["lora_closer_cos"] = cos_sim_lora > cos_sim_base
        row["lora_closer_l2"] = l2_lora < l2_base

        rows.append(row)

        # Move to next assistant response
        index = text.find("<|assistant|>", index2)

# Convert to DataFrame and save to Excel
df = pd.DataFrame(rows)
df.to_excel("emotion_classification_results_joey5.xlsx", index=False)

print("Saved to emotion_classification_results_chandler.xlsx")

Saved to emotion_classification_results_chandler.xlsx


In [21]:
df.to_excel('chandler_emotions.xlsx', index=False)

NameError: name 'df' is not defined