In [None]:
!pip install opik
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf huggingface_hub hf_transfer
!pip install --no-deps unsloth
!pip install torchinfo
!pip install -U datasets

In [None]:
import unsloth

In [None]:
from unsloth import FastLanguageModel
# message history part remains
import re
import os
# is cuda version error persists, with incompatibility issue, try this !!
# os.environ["BNB_CUDA_VERSION"] = "117"
# print(os.environ)


import torch
from transformers import TextIteratorStreamer

# Set environment variable to control model cache location (optional)
# Uncomment and modify the path if you want to change the default cache location
# os.environ["HF_HOME"] = "/path/to/your/model/cache"



# Model configuration
MODEL_ID = "sayantanBiswas/mistral-7b-v0.3"  # Change this to your preferred model
MAX_SEQ_LENGTH = 2048
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16

class ChatModel:
    def __init__(self, model_id):
        print(f"Loading model {model_id} on {DEVICE} with {DTYPE}...")

        # Load the model with Unsloth
        self.model, self.tokenizer = FastLanguageModel.from_pretrained(
            model_name=model_id,
            max_seq_length=MAX_SEQ_LENGTH,
            dtype=DTYPE
        )

        # Configure generation parameters
        self.generation_config = {
            "max_new_tokens": 256,
            "temperature": 0.7,
            "top_p": 0.9,
            "top_k": 50,
            "repetition_penalty": 1.2,
            "do_sample": True,
            "use_cache": True,
        }

        print("Model loaded successfully!")

    def generate_response(self, history):
        # Format the chat history for the model
        # formatted_prompt = self.format_chat_history(history)
        # print('model input: ', history)
        original = ''
        # Extract Response sections independently
        response_pattern = r'### Response:\s+(.*?)(?=###|\Z)'
        response_matches = re.findall(response_pattern, history, re.DOTALL)

        # Clean and store response matches
        if response_matches:
            cleaned_responses = [match.strip() for match in response_matches if match.strip()]
            if cleaned_responses:
                original = cleaned_responses



        # Tokenize the input
        inputs = self.tokenizer([history], return_tensors="pt")
        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}

        # Generate without streaming
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                **self.generation_config
            )

        # Decode the generated tokens
        generated_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Return the full response
        return original[0], generated_response

    # def extract_input_response(self, text):
    #     results = {}

    #     # Extract Input sections independently
    #     input_pattern = r'### Input:\s+(.*?)(?=###|\Z)'
    #     input_matches = re.findall(input_pattern, text, re.DOTALL)

    #     # Clean and store input matches
    #     if input_matches:
    #         cleaned_inputs = [match.strip() for match in input_matches if match.strip()]
    #         if cleaned_inputs:
    #             results['Input'] = cleaned_inputs

    #     # Extract Response sections independently
    #     response_pattern = r'### Response:\s+(.*?)(?=###|\Z)'
    #     response_matches = re.findall(response_pattern, text, re.DOTALL)

    #     # Clean and store response matches
    #     if response_matches:
    #         cleaned_responses = [match.strip() for match in response_matches if match.strip()]
    #         if cleaned_responses:
    #             results['Response'] = cleaned_responses

    #     # If nothing was found, return None
    #     if not results:
    #         return None

    #     return results

    # def format_chat_history(self, history):
    #     with open('history.txt', 'a') as f:
    #         f.write(str(history)+'\n'+'*'*10+'\n')
    #     """Format prompt using only the last turn, optionally with one prior turn for context."""

    #     alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    #     ### Instruction:
    #     {}

    #     ### Input:
    #     {}

    #     ### Response:
    #     """

    #     # Initialize variables to handle previous user and assistant messages
    #     prev_user = ""
    #     prev_assistant = ""
    #     context = ""

    #     # Include one previous exchange as context (optional)
    #     if len(history) > 1:
    #         print('history: ', history, len(history))

    #         # Get the previous exchange
    #         prev_exchange = history[-2][1]

    #         # Extract input and response from previous exchange
    #         results = self.extract_input_response(prev_exchange)

    #         if results is not None:
    #             prev_user_list = results.get('Input')
    #             prev_assistant_list = results.get('Response')

    #             prev_user = '\n'.join(prev_user_list) if prev_user_list else ''
    #             prev_assistant = '\n'.join(prev_assistant_list) if prev_assistant_list else ''

    #             # Create context with previous exchange
    #             context = f"\n{prev_user}\n\n{prev_assistant}\n"

    #     current_user_input = history[-1][0]
    #     full_input = context + f"\n{current_user_input}"

    #     return alpaca_prompt.format(
    #         "Answer the user's question accurately, thoroughly, and helpfully. Provide clear explanations with relevant details. If asked about medical or health-related topics, give informative responses while maintaining a balanced and educational tone.",
    #         full_input
    #     )

In [None]:

chat = ChatModel(MODEL_ID)
tokenizer = chat.tokenizer
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    # instructions = examples["instruction"]
    # inputs       = examples["input"]
    # outputs      = examples["output"]

    # print(examples)

    instructions = "Answer the user's question accurately, thoroughly, and helpfully. Provide clear explanations with relevant details. If asked about medical or health-related topics, give informative responses while maintaining a balanced and educational tone."

    inputs = examples['text'].split('<HUMAN>:')[-1].split('<ASSISTANT>:')[0].strip()
    outputs = examples['text'].split('<HUMAN>:')[-1].split('<ASSISTANT>:')[1].strip()

    # print(inputs)
    texts = []
    # for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
    text = alpaca_prompt.format(instructions, inputs, outputs) + EOS_TOKEN
        # texts.append(text)
    return { "text" : text, }
pass

from datasets import load_dataset
dataset = load_dataset("heliosbrahma/mental_health_chatbot_dataset", split = "train")
dataset = dataset.map(formatting_prompts_func)

In [None]:
len(dataset)

In [None]:
from tqdm import tqdm, trange
val_data = []
for i in trange(len(dataset)):

  original, response = chat.generate_response(dataset[i]['text'])

  # Extract only the response part using regex
  pattern = r"### Response:(.*?)(?=###|\Z)"
  match = re.search(pattern, response, re.DOTALL)

  if match:
      clean_response = match.group(1).strip()
  else:
      clean_response = "Failed to extract response"

  val_data.append((clean_response,original))

  # print(response)
  # print(original)

In [None]:
!pip install sacrebleu

In [None]:
import math
import re
from collections import Counter
import sacrebleu
from tqdm import tqdm

def simple_tokenize(text):
    """Simple whitespace + punctuation tokenizer."""
    text = re.sub(r'[^\w\s]', ' ', text)
    return text.lower().split()

def calculate_bleu_simple(reference, candidate, max_n=4):
    """
    Simplified BLEU score (no external dependencies).
    """
    precisions = []
    for n in range(1, min(max_n + 1, len(candidate) + 1)):
        candidate_ngrams = [tuple(candidate[i:i+n]) for i in range(len(candidate) - n + 1)]
        candidate_counts = Counter(candidate_ngrams)

        max_counts = Counter()
        for ref in reference:
            ref_ngrams = [tuple(ref[i:i+n]) for i in range(len(ref) - n + 1)]
            ref_counts = Counter(ref_ngrams)
            for ngram, count in ref_counts.items():
                max_counts[ngram] = max(max_counts[ngram], count)

        clipped = {ngram: min(count, max_counts[ngram]) for ngram, count in candidate_counts.items()}
        numerator = sum(clipped.values())
        denominator = sum(candidate_counts.values())
        precision = numerator / denominator if denominator > 0 else 0
        precisions.append(precision)

    # Brevity penalty
    ref_lens = [len(ref) for ref in reference]
    closest_ref_len = min(ref_lens, key=lambda x: abs(x - len(candidate)))
    bp = math.exp(1 - closest_ref_len / len(candidate)) if len(candidate) < closest_ref_len else 1.0

    if all(p > 0 for p in precisions):
        bleu = bp * math.exp(sum(math.log(p) for p in precisions) / len(precisions))
    else:
        bleu = 0
    return bleu

def calculate_sacrebleu(reference, hypothesis):
    return sacrebleu.sentence_bleu(hypothesis, [reference]).score

def calculate_perplexity_4gram(text1, text2):
    """4-gram Laplace-smoothed perplexity between machine-generated (text1) and original (text2)."""
    def tokenize(text):
        text = text.replace('</s>', '')
        return re.findall(r'\w+|[^\w\s]', text.lower())

    def build_ngrams(tokens, n):
        return [tuple(tokens[i:i+n]) for i in range(len(tokens) - n + 1)]

    tokens1 = tokenize(text1)
    tokens2 = tokenize(text2)

    fourgram_counts = Counter(build_ngrams(tokens2, 4))
    trigram_counts = Counter(build_ngrams(tokens2, 3))

    vocab = set(tokens1 + tokens2)
    vocab_size = len(vocab)

    test_fourgrams = build_ngrams(tokens1, 4)
    if not test_fourgrams:
        return float('inf')  # Prevent division by zero

    log_sum = 0
    for fg in test_fourgrams:
        prefix = fg[:-1]
        prob = (fourgram_counts.get(fg, 0) + 1) / (trigram_counts.get(prefix, 0) + vocab_size)
        log_sum += math.log2(prob)

    avg_log_prob = log_sum / len(test_fourgrams)
    perplexity = math.pow(2, -avg_log_prob)
    return perplexity

def calculate_jaccard_similarity(text1, text2):
    tokens1 = set(re.findall(r'\w+|[^\w\s]', text1.lower()))
    tokens2 = set(re.findall(r'\w+|[^\w\s]', text2.lower()))
    intersection = len(tokens1 & tokens2)
    union = len(tokens1 | tokens2)
    return intersection / union if union > 0 else 1.0

def score_calculator(machine_generated, original):
    machine_tokens = simple_tokenize(machine_generated)
    original_tokens = simple_tokenize(original)
    reference = [original_tokens]
    candidate = machine_tokens

    bleu_score = calculate_bleu_simple(reference, candidate)
    sacre_score = calculate_sacrebleu(original, machine_generated)
    perplexity_score = calculate_perplexity_4gram(machine_generated, original)
    jaccard_score = calculate_jaccard_similarity(machine_generated, original)

    return bleu_score, sacre_score, perplexity_score, jaccard_score


scores = []
for machine, original in tqdm(val_data):
    bleu, sacre, perplexity, jaccard = score_calculator(machine, original)
    scores.append((bleu, sacre, perplexity, jaccard))

# scores
import numpy as np
np.mean(scores, axis = 0)
