In [None]:
import os   
import stat  
import math  
import torch  
from transformers import AutoTokenizer, AutoModelForCausalLM  
import nltk
from nltk.translate.bleu_score import sentence_bleu  
from nltk.translate.meteor_score import meteor_score  
from rouge_score import rouge_scorer  
from sentence_transformers import SentenceTransformer  
from sklearn.metrics.pairwise import cosine_similarity   

# Set device
# Force selection of the NVIDIA GPU (assumed as device 0)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Download necessary NLTK resources  
nltk.download('punkt')  
nltk.download('wordnet')  
nltk.download('omw-1.4')

# Set HF_HOME to a local, writable directory.  
os.environ["HF_HOME"] = os.path.join(os.getcwd(), "hf_cache")  
cache_dir = os.environ["HF_HOME"]  
  
if not os.path.exists(cache_dir):  
    os.makedirs(cache_dir, exist_ok=True)  
  
print("Using HF_HOME:", cache_dir)  
  
# Initialize the tokenizer and model using the local cache directory.  
print("Loading tokenizer and model using cache_dir = " + cache_dir + "...")  
tokenizer = AutoTokenizer.from_pretrained("kashyaparun/Mental-Health-Chatbot-using-RoBERTa-fine-tuned-on-GoEmotion", cache_dir=cache_dir)  
model = AutoModelForCausalLM.from_pretrained("kashyaparun/Mental-Health-Chatbot-using-RoBERTa-fine-tuned-on-GoEmotion", cache_dir=cache_dir)  
model.eval()  # Set the model to evaluation mode  
  
# Download necessary NLTK resources.  
nltk.download('punkt')  
  
print("Tokenizer and model loaded successfully.")  
print("Vocabulary size:", model.config.vocab_size)  
print("Model max length:", tokenizer.model_max_length)  
  
# Updated compute_perplexity function that replaces out-of-range token IDs with the unknown token.  
def compute_perplexity(text):  
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=tokenizer.model_max_length).to(model.device)  
    input_ids = inputs["input_ids"]  
    vocab_size = model.config.vocab_size  
    unk_token_id = tokenizer.unk_token_id if tokenizer.unk_token_id is not None else 0  
    input_ids_fixed = torch.where(input_ids >= vocab_size, torch.tensor(unk_token_id, device=input_ids.device), input_ids)  
    inputs["input_ids"] = input_ids_fixed  
    with torch.no_grad():  
        outputs = model(**inputs, labels=input_ids_fixed)  
    loss = outputs.loss.item()  
    perplexity = math.exp(loss)  
    return perplexity  
  
# Test the function with sample evaluation texts.  
eval_texts = [  
    "I feel anxious and don't know what to do.",  
    "I'm depressed and need someone to talk to.",  
    "I feel overwhelmed with stress at work."  
]  
  
print("Evaluating Perplexity after fix:")  
for text in eval_texts:  
    ppl = compute_perplexity(text)  
    print("Text: " + text)  
    print("Perplexity: " + str(ppl))  
    print("-----")  
  
print("done")  

# Load a sentence transformer model for semantic similarity  
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')  
  
def evaluate_response_quality(generated_response, reference_response):  
    # Tokenize responses  
    gen_tokens = nltk.word_tokenize(generated_response.lower())  
    ref_tokens = nltk.word_tokenize(reference_response.lower())  
      
    # Calculate BLEU score (using a simple 4-gram setting)  
    bleu = sentence_bleu([ref_tokens], gen_tokens)  
      
    # Calculate METEOR score  
    meteor = meteor_score([ref_tokens], gen_tokens)  
      
    # Calculate ROUGE scores  
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)  
    rouge_scores = scorer.score(reference_response, generated_response)  
      
    # Calculate semantic similarity using embeddings  
    embeddings = semantic_model.encode([reference_response, generated_response])  
    semantic_sim = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]  
      
    return {  
        'bleu': bleu,  
        'meteor': meteor,  
        'rouge1_f': rouge_scores['rouge1'].fmeasure,  
        'rouge2_f': rouge_scores['rouge2'].fmeasure,  
        'rougeL_f': rouge_scores['rougeL'].fmeasure,  
        'semantic_similarity': semantic_sim  
    }  
  
# Example usage:  
reference = "I'm sorry you're feeling anxious. It might help to try some deep breathing exercises and reach out to someone you trust."  
generated = "I understand that you're anxious. Perhaps deep breathing and talking with a friend might ease some stress."  
  
metrics = evaluate_response_quality(generated, reference)  
print("Evaluation Metrics:")  
for metric, value in metrics.items():  
    print(metric + ": " + str(value))
    


Using device: cpu


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\mward\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\mward\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\mward\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Using HF_HOME: c:\Users\mward\Project_3\hf_cache
Loading tokenizer and model using cache_dir = c:\Users\mward\Project_3\hf_cache...


Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:  24%|##4       | 870M/3.59G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:  24%|##3       | 1.16G/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:  26%|##6       | 1.30G/4.95G [00:00<?, ?B/s]

Error while downloading from https://cdn-lfs-us-1.hf.co/repos/8f/99/8f991a3329a6eab513a218c979c1e3d9409e6be2b69f315fd96a0312fe23efa8/b2bf4c94feb4e375cfbcd4c4cdcf74ac5e318f47318d7f63907d2e53aee0302d?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00002-of-00003.safetensors%3B+filename%3D%22model-00002-of-00003.safetensors%22%3B&Expires=1743741468&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0Mzc0MTQ2OH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzhmLzk5LzhmOTkxYTMzMjlhNmVhYjUxM2EyMThjOTc5YzFlM2Q5NDA5ZTZiZTJiNjlmMzE1ZmQ5NmEwMzEyZmUyM2VmYTgvYjJiZjRjOTRmZWI0ZTM3NWNmYmNkNGM0Y2RjZjc0YWM1ZTMxOGY0NzMxOGQ3ZjYzOTA3ZDJlNTNhZWUwMzAyZD9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=RX5WA58SfBxHwNNBs0hCIdqSupHJFfzGkR-OtHwlJWWkG2ktsa2-58MoxevjRvjqNHGgygEWA2-iE0r5ZmdSRA9pVlj0lEuRlYVxDwBljM238mA9hjfzHRpKjYGki7zjjwUAw1x%7Eu1rdmmXebb9ugZmmClfOhz5YEYeMnecPrFNi5SGX7mKfd7uhSHX0IR-fee2ES3ExBa9ifg%7ExtPuvxiS5A90mfJk9mV

model-00002-of-00003.safetensors:  31%|###       | 1.52G/4.95G [00:00<?, ?B/s]