# qwen + sliding chunking


In [2]:
import torch
import numpy as np
import faiss
import requests
import re
import os
import nltk
from nltk.tokenize import sent_tokenize
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import csv
from datetime import datetime

nltk.download('punkt')

# -----------------------------
# 📤 Text Loaders
# -----------------------------

def extract_text_from_web(url):
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')
        text = '\n'.join(p.get_text() for p in soup.find_all(['p', 'li']))
        return re.sub(r'\n+', '\n', text).strip()
    except:
        return ''

def extract_texts_from_folder(folder_path):
    texts = []
    for file in os.listdir(folder_path):
        if file.endswith(".txt"):
            with open(os.path.join(folder_path, file), 'r', encoding='utf-8') as f:
                texts.append(f.read())
    return texts

# -----------------------------
# 🧠 Sentence-Level Sliding Chunking
# -----------------------------

def sliding_window_chunking(text, window_size=3, overlap=1):
    sentences = sent_tokenize(text)
    chunks = []
    i = 0
    while i < len(sentences):
        chunk = ' '.join(sentences[i:i+window_size])
        chunks.append(chunk)
        if i + window_size >= len(sentences):
            break
        i += window_size - overlap
    return chunks

# -----------------------------
# 🔍 Embedding & Retrieval
# -----------------------------

class Embedder:
    def __init__(self, model_name='mindpadi/intent_encoder'):
        self.model = SentenceTransformer(model_name)

    def embed(self, texts):
        return self.model.encode(texts, convert_to_numpy=True)

class VectorStore:
    def __init__(self, dim):
        self.index = faiss.IndexHNSWFlat(dim, 32)
        self.chunks = []

    def add(self, embeddings, texts):
        self.index.add(embeddings)
        self.chunks.extend(texts)

    def search(self, query_embedding, top_k=5):
        _, idxs = self.index.search(query_embedding, top_k)
        return [self.chunks[i] for i in idxs[0]]

# -----------------------------
# ✂️ Summarization
# -----------------------------

def load_summarizer(model_name='facebook/bart-large-cnn'):
    return pipeline('summarization', model=model_name)

def summarize_text(summarizer, text, max_len=60):
    return summarizer(text, max_length=max_len, min_length=20, do_sample=False)[0]['summary_text']

# -----------------------------
# 🧠 Prompt Template (Empathetic)
# -----------------------------

def build_prompt(context, question):
    return f"""You are a kind and empathetic mental health assistant. Your goal is to make the user feel heard, validated, and supported.
   - Be clear, concise, and warm. Speak like a friend, not a textbook.
   - If you are unsure or the information is not in the context, say "I'm not sure" or "I don't know" rather than guessing.
   - Always respond directly to what the user is feeling or asking.    
Do NOT provide clinical jargon or speculative analysis. Instead, respond with short, clear, and compassionate language, like a trusted friend.
Focus on validating the user's feelings, showing understanding, and gently suggesting helpful next steps without giving medical advice.

Context:
{context}

User says: "{question}"

Your response:"""


# -----------------------------
# 🧠 Generator
# -----------------------------

class Generator:
    def __init__(self, model_name):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name, trust_remote_code=True, torch_dtype=torch.float16, device_map="auto"
        )
        self.model.eval()

    def generate(self, prompt, max_tokens=150):
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        input_length = inputs.input_ids.shape[1]

        with torch.no_grad():
            output = self.model.generate(
                **inputs,
                do_sample=True,
                temperature=0.6,
                top_p=0.85,
                repetition_penalty=1.2,
                max_new_tokens=max_tokens,
                pad_token_id=self.tokenizer.eos_token_id
            )
        # Decode only newly generated tokens, excluding prompt
        generated_tokens = output[0][input_length:]
        text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
        text = re.sub(r'\n+', '\n', text).strip()
        return text

# -----------------------------
# 📋 RLHF Feedback Logging
# -----------------------------

def log_feedback(question, answer, rating, log_file="feedback_log.csv"):
    exists = os.path.isfile(log_file)
    with open(log_file, mode='a', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        if not exists:
            writer.writerow(["timestamp", "question", "answer", "rating"])
        writer.writerow([datetime.now().isoformat(), question, answer, rating])

# -----------------------------
# 🚀 Build System
# -----------------------------

def build_system(txt_folder, urls, generator_model_path):
    print("📥 Loading content...")
    docs = extract_texts_from_folder(txt_folder) + [extract_text_from_web(url) for url in urls]

    print("✂️ Chunking...")
    chunks = []
    for doc in docs:
        chunks.extend(sliding_window_chunking(doc))

    embedder = Embedder()
    embeddings = embedder.embed(chunks)

    store = VectorStore(dim=embeddings.shape[1])
    store.add(embeddings, chunks)

    summarizer = load_summarizer()
    generator = Generator(generator_model_path)

    return embedder, store, summarizer, generator

# -----------------------------
# 💬 Chat Loop with Feedback
# -----------------------------

def chat(embedder, store, summarizer, generator):
    print("🤖 Assistant: Hello! I'm here to support you with anything on your mind today. How are you feeling?")
    previous_question = None
    while True:
        user_q = input("\n🧠 Ask a question (type 'exit' to quit): ")
        if user_q.lower().strip() == 'exit':
            print("👋 Bye!")
            break

        context_chunks = []
        if previous_question:
            context_to_summarize = f"Previously, the user mentioned: {previous_question}"
            short_context = summarize_text(summarizer, context_to_summarize)
            context_chunks.append(short_context)

        query_emb = embedder.embed([user_q])
        context_chunks += store.search(query_emb, top_k=4)

        prompt = build_prompt('\n'.join(context_chunks), user_q)
        answer = generator.generate(prompt)

        print("\n🤖 Assistant:\n")
        print('\n'.join(re.split(r'(?<=[.?!]) +', answer)))

        # Simplified RLHF Feedback
        while True:
            feedback = input("\nWas this response helpful? [👍=1, 😐=0, 👎=-1]: ").strip()
            if feedback in ['1', '0', '-1']:
                rating = int(feedback)
                break
            else:
                print("❗ Please enter 1 for 👍, 0 for 😐, or -1 for 👎.")

        log_feedback(user_q, answer, rating)
        previous_question = user_q

# -----------------------------
# 🔧 Initialize
# -----------------------------

txt_folder = "pdfs"
web_urls = [
    "https://www.apa.org/topics/anxiety/disorders",
    "https://www.ncbi.nlm.nih.gov/books/NBK558911/"
]
model_path = "qwen2.5-0.5B_finetuned_mentalhealth"

embedder, store, summarizer, generator = build_system(txt_folder, web_urls, model_path)
chat(embedder, store, summarizer, generator)


[nltk_data] Downloading package punkt to /home/kunuru/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


📥 Loading content...
✂️ Chunking...


Device set to use cuda:0


🤖 Assistant: Hello! I'm here to support you with anything on your mind today. How are you feeling?

🧠 Ask a question (type 'exit' to quit): Lately, I’ve been feeling really anxious about small things — like I keep worrying all day about things that probably won’t even happen. It’s making me restless, and I can’t focus on my work or enjoy things like I used to. What should I do?

🤖 Assistant:

When we encounter challenges such as these, especially when they seem so overwhelming at first glance, taking some time for selfreflection and acknowledging our emotions can help us gain clarity and perspective.
Letting go of perfectionism around tiny details might also give us permission to acknowledge that there will always be something unfulfilled or uncertain.
Here are a few suggestions:
1.
Self-awareness: Take some quiet moments each evening where you reflect on how you perceive yourself right now.
Ask yourself questions like "What am I afraid of?
Am I living up to expectations?" These intros

Your max_length is set to 60, but your input_length is only 57. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=28)



🤖 Assistant:

When starting new practices like mindfulness or meditation there can sometimes be initial resistance as our minds tend to wander easily during these processes.
However, regular consistency has been shown to improve focus and reduce anxiety over time.
Here’s how we encourage beginners:
1.
Start small: Begin by setting aside dedicated time each day for practicing deep breathing exercises even if they seem effortless at first.
You could set reminders on your phone or use an alarm clock so you always remember to take care of yourself mentally before bedtime.
2.
Find something comfortable: Make sure the space where you practice isn’t too stimulating or distracting.
It might help to create a calming environment such as dimming lights making noiseless rooms or having headphones turned off while listening to soothing music.
3.

Was this response helpful? [👍=1, 😐=0, 👎=-1]: bye
❗ Please enter 1 for 👍, 0 for 😐, or -1 for 👎.

Was this response helpful? [👍=1, 😐=0, 👎=-1]: 1

🧠 Ask a q

### after breaking at fullstop + dynamic summarizer


In [1]:
import torch
import numpy as np
import faiss
import requests
import re
import os
import nltk
from nltk.tokenize import sent_tokenize
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import csv
from datetime import datetime

nltk.download('punkt')

# -----------------------------
# 📤 Text Loaders
# -----------------------------

def extract_text_from_web(url):
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')
        text = '\n'.join(p.get_text() for p in soup.find_all(['p', 'li']))
        return re.sub(r'\n+', '\n', text).strip()
    except:
        return ''

def extract_texts_from_folder(folder_path):
    texts = []
    for file in os.listdir(folder_path):
        if file.endswith(".txt"):
            with open(os.path.join(folder_path, file), 'r', encoding='utf-8') as f:
                texts.append(f.read())
    return texts

# -----------------------------
# 🧠 Sentence-Level Sliding Chunking
# -----------------------------

def sliding_window_chunking(text, window_size=3, overlap=1):
    sentences = sent_tokenize(text)
    chunks = []
    i = 0
    while i < len(sentences):
        chunk = ' '.join(sentences[i:i+window_size])
        chunks.append(chunk)
        if i + window_size >= len(sentences):
            break
        i += window_size - overlap
    return chunks

# -----------------------------
# 🔍 Embedding & Retrieval
# -----------------------------

class Embedder:
    def __init__(self, model_name='mindpadi/intent_encoder'):
        self.model = SentenceTransformer(model_name)

    def embed(self, texts):
        return self.model.encode(texts, convert_to_numpy=True)

class VectorStore:
    def __init__(self, dim):
        self.index = faiss.IndexHNSWFlat(dim, 32)
        self.chunks = []

    def add(self, embeddings, texts):
        self.index.add(embeddings)
        self.chunks.extend(texts)

    def search(self, query_embedding, top_k=5):
        _, idxs = self.index.search(query_embedding, top_k)
        return [self.chunks[i] for i in idxs[0]]

# -----------------------------
# ✂️ Summarization (Dynamic)
# -----------------------------
def load_summarizer(model_name='facebook/bart-large-cnn'):
    return pipeline('summarization', model=model_name)

def summarize_text(summarizer, text):
    length = len(text.split())

    # Skip summarization for short inputs
    if length < 50:
        return text

    # Dynamically calculate summary length
    max_len = min(120, max(30, length // 3))
    min_len = max(20, max_len // 2)

    summary = summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']
    return summary


# -----------------------------
# 🧠 Prompt Template (Empathetic)
# -----------------------------

def build_prompt(context, question):
    return f"""You are a kind and empathetic mental health assistant.
Speak like a trusted friend: short, clear, warm, and non-clinical.

Rules:
- Avoid long apologies or excessive reassurance.
- Validate the user's feelings in one or two friendly sentences.
- Don't give medical advice.
- If unsure, say “I’m not sure.”

Context:
{context}

User says: "{question}"

Your response:"""

# -----------------------------
# 🧠 Generator (with sentence trimming)
# -----------------------------

class Generator:
    def __init__(self, model_name):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name, trust_remote_code=True, torch_dtype=torch.float16, device_map="auto"
        )
        self.model.eval()

    def generate(self, prompt, max_tokens=150):
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        input_length = inputs.input_ids.shape[1]

        with torch.no_grad():
            output = self.model.generate(
                **inputs,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                repetition_penalty=1.2,
                max_new_tokens=max_tokens,
                pad_token_id=self.tokenizer.eos_token_id
            )
        # Decode only newly generated tokens, excluding prompt
        generated_tokens = output[0][input_length:]
        text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
        text = re.sub(r'\n+', '\n', text).strip()

        # Trim output to last complete sentence
        sentences = re.split(r'(?<=[.!?]) +', text)
        if len(sentences) > 1:
            last_sentence = sentences[-1]
            if not re.search(r'[.!?]$', last_sentence):
                # Drop incomplete last sentence
                sentences = sentences[:-1]
            text = ' '.join(sentences).strip()

        return text

# -----------------------------
# 📋 RLHF Feedback Logging
# -----------------------------

def log_feedback(question, answer, rating, log_file="feedback_log.csv"):
    exists = os.path.isfile(log_file)
    with open(log_file, mode='a', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        if not exists:
            writer.writerow(["timestamp", "question", "answer", "rating"])
        writer.writerow([datetime.now().isoformat(), question, answer, rating])

# -----------------------------
# 🚀 Build System
# -----------------------------

def build_system(txt_folder, urls, generator_model_path):
    print("📥 Loading content...")
    docs = extract_texts_from_folder(txt_folder) + [extract_text_from_web(url) for url in urls]

    print("✂️ Chunking...")
    chunks = []
    for doc in docs:
        chunks.extend(sliding_window_chunking(doc))

    embedder = Embedder()
    embeddings = embedder.embed(chunks)

    store = VectorStore(dim=embeddings.shape[1])
    store.add(embeddings, chunks)

    summarizer = load_summarizer()
    generator = Generator(generator_model_path)

    return embedder, store, summarizer, generator


# -----------------------------
# 💬 Chat Loop with Feedback
# -----------------------------
def chat(embedder, store, summarizer, generator):
    print("🤖 Assistant: Hello! I'm here to support you with anything on your mind today. How are you feeling?")
    previous_question = None
    while True:
        user_q = input("\n🧠 Ask a question (type 'exit' to quit): ")
        if user_q.lower().strip() == 'exit':
            print("👋 Bye!")
            break

        context_chunks = []
        if previous_question:
            context_to_summarize = f"Previously, the user mentioned: {previous_question}"
            short_context = summarize_text(summarizer, context_to_summarize)  # ✅ dynamic summary
            context_chunks.append(short_context)

        query_emb = embedder.embed([user_q])
        context_chunks += store.search(query_emb, top_k=4)

        prompt = build_prompt('\n'.join(context_chunks), user_q)
        answer = generator.generate(prompt)

        print("\n🤖 Assistant:\n")
        print('\n'.join(re.split(r'(?<=[.?!]) +', answer)))

        while True:
            feedback = input("\nWas this response helpful? [👍=1, 😐=0, 👎=-1]: ").strip()
            if feedback in ['1', '0', '-1']:
                rating = int(feedback)
                break
            else:
                print("❗ Please enter 1 for 👍, 0 for 😐, or -1 for 👎.")

        log_feedback(user_q, answer, rating)
        previous_question = user_q

# -----------------------------
# 🔧 Initialize
# -----------------------------

txt_folder = "pdfs"
web_urls = [
    "https://www.apa.org/topics/anxiety/disorders",
    "https://www.ncbi.nlm.nih.gov/books/NBK558911/"
]
model_path = "qwen2.5-0.5B_finetuned_mentalhealth"

embedder, store, summarizer, generator = build_system(txt_folder, web_urls, model_path)
chat(embedder, store, summarizer, generator)


2025-05-18 19:03:29.661086: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-18 19:03:30.380272: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2025-05-18 19:03:30.380397: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


[2025-05-18 19:03:31,863] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/usr/bin/ld: cannot find -lcufile: No such file or directory
collect2: error: ld returned 1 exit status
[nltk_data] Downloading package punkt to /home/kunuru/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


📥 Loading content...
✂️ Chunking...


Device set to use cuda:0
Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


🤖 Assistant: Hello! I'm here to support you with anything on your mind today. How are you feeling?

🧠 Ask a question (type 'exit' to quit): I’ve been feeling really anxious lately, and it’s hard to focus on anything. I don’t even know why I feel this way.

🤖 Assistant:

It sounds like you’re experiencing some anxiety right now and it might be helpful for you to take care of yourself first before dealing with any additional concerns such as difficulties focusing online.
Here are some suggestions that could help alleviate your symptoms:
1.
Practice deep breathing exercises: Take slow deep breaths several times throughout the day when you start feeling overwhelmed by stress.
Deep breathing can actually calm down your nervous system helping you stay calmer.
2.
Try meditation or mindfulness practices: Mindfulness involves being present without judgment which has been shown to reduce anxiety levels.
You can try guided meditations apps like Headspace or simply setting aside specific time each