In [None]:
!pip install --user datasets transformers scikit-learn intel-extension-for-transformers accelerate

In [None]:
!pip install datasets --user

In [None]:
!pip install intel-extension-for-transformers --target=intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/docs/notebooks/project



In [None]:
!pip install yacs --user

In [8]:
!pip install simpy --user

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting simpy
  Using cached simpy-4.1.1-py3-none-any.whl.metadata (6.1 kB)
Using cached simpy-4.1.1-py3-none-any.whl (27 kB)
Installing collected packages: simpy
Successfully installed simpy-4.1.1


In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
import json
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split

class MentalAwarenessDataset(Dataset):
    def __init__(self, questions, answers, tokenizer):
        self.questions = questions
        self.answers = answers
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.questions)

    def __getitem__(self, idx):
        question = self.questions[idx]
        answer = self.answers[idx]
        inputs = self.tokenizer(question, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
        labels = self.tokenizer(answer, return_tensors="pt", truncation=True, padding="max_length", max_length=512).input_ids
        labels[labels == self.tokenizer.pad_token_id] = -100  # Set padding tokens to -100 for CrossEntropyLoss
        return {
            "input_ids": inputs.input_ids.flatten(),
            "attention_mask": inputs.attention_mask.flatten(),
            "labels": labels.flatten(),
        }

# Load the dataset
with open('output.json', 'r') as f:
    mental_awareness_data = json.load(f)

questions = [item['question'] for item in mental_awareness_data]
answers = [item['answer'] for item in mental_awareness_data]

# Load pre-trained model and tokenizer
model_name = "microsoft/DialoGPT-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Add special tokens if needed
special_tokens_dict = {'pad_token': '[PAD]'}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
model.resize_token_embeddings(len(tokenizer))

# Split the data into training and evaluation sets
train_questions, eval_questions, train_answers, eval_answers = train_test_split(questions, answers, test_size=0.1)

# Create datasets
train_dataset = MentalAwarenessDataset(train_questions, train_answers, tokenizer)
eval_dataset = MentalAwarenessDataset(eval_questions, eval_answers, tokenizer)

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=2,
    learning_rate=3e-5,
    warmup_steps=20,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    save_total_limit=2,
    gradient_accumulation_steps=4,
    evaluation_strategy="steps",
    eval_steps=50,
    save_strategy="steps",  # Save model every few steps
    save_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="loss"
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model and tokenizer
model.save_pretrained('./fine_tuned2')
tokenizer.save_pretrained('./fine_tuned2')



KeyboardInterrupt



In [2]:
import json
from transformers import AutoModelForCausalLM, AutoTokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

model = AutoModelForCausalLM.from_pretrained('./fine_tuned2')
tokenizer = AutoTokenizer.from_pretrained('./fine_tuned2')


with open('output.json', 'r') as f:
    mental_awareness_data = json.load(f)

questions = [item['question'] for item in mental_awareness_data]
answers = [item['answer'] for item in mental_awareness_data]

vectorizer = TfidfVectorizer()
vectorizer.fit(questions)

def retrieve_response(query):
    query_vec = vectorizer.transform([query])
    question_vecs = vectorizer.transform(questions)
    similarities = cosine_similarity(query_vec, question_vecs).flatten()

    max_similarity = similarities.max()
    if max_similarity > 0.2:  # Threshold for a good match
        best_match_idx = similarities.argmax()
        return answers[best_match_idx]
    else:
        return None

def generate_response(prompt, model, tokenizer):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(inputs.input_ids, max_length=100, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def chat_with_dexter():
    print("Hi, I'm Dexter, your mental health awareness chatbot. How can I help you today?")
    chat_history = ""
    
    while True:
        user_input = input("You: ")
        if user_input.lower() in ['exit', 'quit', 'bye']:
            print("Dexter: Goodbye! Take care.")
            break
        
    
        chat_history += f"User: {user_input}\n"
        
        
        response = retrieve_response(user_input)
        
        if response:
            chat_history += f"Dexter: {response}\n"
            print(f"Dexter: {response}")
        else:
            
            try:
                prompt = chat_history + f"User: {user_input}\nDexter: "
                generated_response = generate_response(prompt, model, tokenizer)
                chat_history += f"Dexter: {generated_response}\n"
                print(f"Dexter: {generated_response}")
            except Exception as e:
                error_message = f"Sorry, I couldn't process that. Please try again. (Error: {str(e)})"
                chat_history += f"Dexter: {error_message}\n"
                print(f"Dexter: {error_message}")

# Start the chat
chat_with_dexter()


Hi, I'm Dexter, your mental health awareness chatbot. How can I help you today?


You:  hello


Dexter: Hi! Dexter is here to assist you with any mental health concerns. How can I help today?


You:  what is depression


Dexter: : Depression is a mood disorder that causes persistent feelings of sadness, loss of interest in activities once enjoyed, and a range of physical and emotional symptoms that can interfere with daily life.


You:  what is narcotics


Dexter: Narcotics, also known as opioids, are drugs that relieve pain and can produce euphoria. They include prescription medications like morphine and oxycodone, as well as illegal drugs like heroin. Narcotics can be highly addictive and have serious health risks.


You:  my friend is having suicide thoughts can you help him


Dexter: Promote mental health awareness, reduce stigma surrounding mental illness, advocate for accessible mental health services, and educate others about suicide prevention.


KeyboardInterrupt: Interrupted by user

In [None]:
import json
from transformers import AutoModelForCausalLM, AutoTokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the fine-tuned model and tokenizer
model = AutoModelForCausalLM.from_pretrained('./fine_tuned2')
tokenizer = AutoTokenizer.from_pretrained('./fine_tuned2')

# Load your mental awareness dataset
with open('output.json', 'r') as f:
    mental_awareness_data = json.load(f)

questions = [item['question'] for item in mental_awareness_data]
answers = [item['answer'] for item in mental_awareness_data]

# Initialize the TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
vectorizer.fit(questions)

def retrieve_response(query):
    query_vec = vectorizer.transform([query])
    question_vecs = vectorizer.transform(questions)
    similarities = cosine_similarity(query_vec, question_vecs).flatten()
    
    # Debugging information
    print("Query:", query)
    print("Similarities:", similarities)

    max_similarity = similarities.max()
    if max_similarity > 0.3:  # Lowered threshold for a good match
        best_match_idx = similarities.argmax()
        print("Best match index:", best_match_idx)
        return answers[best_match_idx]
    else:
        return None

def generate_response(prompt, model, tokenizer):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(inputs.input_ids, max_length=100, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def chat_with_dexter():
    print("Hi, I'm Dexter, your mental health awareness chatbot. How can I help you today?")
    while True:
        user_input = input("You: ")
        if user_input.lower() in ['exit', 'quit', 'bye']:
            print("Dexter: Goodbye! Take care.")
            break
        
        # Try to retrieve a response from the dataset
        response = retrieve_response(user_input)
        
        if response:
            print(f"Dexter: {response}")
        else:
            # Generate a response if no good match is found
            try:
                generated_response = generate_response(user_input, model, tokenizer)
                print(f"Dexter: {generated_response}")
            except Exception as e:
                print(f"Dexter: Sorry, I couldn't process that. Please try again. (Error: {str(e)})")

# Start the chat
chat_with_dexter()


In [26]:
import json
from transformers import AutoModelForCausalLM, AutoTokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import display, HTML
import ipywidgets as widgets


model = AutoModelForCausalLM.from_pretrained('./fine_tuned2')
tokenizer = AutoTokenizer.from_pretrained('./fine_tuned2')


with open('output.json', 'r') as f:
    mental_awareness_data = json.load(f)

questions = [item['question'] for item in mental_awareness_data]
answers = [item['answer'] for item in mental_awareness_data]


vectorizer = TfidfVectorizer()
vectorizer.fit(questions)

def retrieve_response(query):
    query_vec = vectorizer.transform([query])
    question_vecs = vectorizer.transform(questions)
    similarities = cosine_similarity(query_vec, question_vecs).flatten()

    max_similarity = similarities.max()
    if max_similarity > 0.2:  # Threshold for a good match
        best_match_idx = similarities.argmax()
        return answers[best_match_idx]
    else:
        return None

def generate_response(prompt, model, tokenizer):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(inputs.input_ids, max_length=100, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


chat_history = ""

def chat_with_dexter():
    global chat_history
    print( "Hi, I'm Dexter, your mental health awareness chatbot. How can I help you today?")
    
    while True:
        user_input = input("You: ")
        
        if user_input.lower() in ['exit', 'quit', 'bye']:
            print("Dexter: Goodbye! Take care.")
            break
        
        chat_history += f"User: {user_input}\n"
        
        response = retrieve_response(user_input)
        
        if response:
            chat_history += f"Dexter: {response}\n"
            print(f"Dexter: {response}")
        else:
            try:
                prompt = chat_history + f"User: {user_input}\nDexter: "
                generated_response = generate_response(prompt, model, tokenizer)
                chat_history += f"Dexter: {generated_response}\n"
                print(f"Dexter: {generated_response}")
            except Exception as e:
                error_message = f"Sorry, I couldn't process that. Please try again. (Error: {str(e)})"
                chat_history += f"Dexter: {error_message}\n"
                print(f"Dexter: {error_message}")


if __name__ == "__main__":

    display(HTML(filename='index.html'))
    
 
    chat_with_dexter()


Hi, I'm Dexter, your mental health awareness chatbot. How can I help you today?


You:  hi


Dexter: Hello! Dexter is here to support you. How are you feeling today?


You:  i am feeling sad


Dexter: Im sorry to hear that. Dexter is here to support you. How can I help you through this?


You:  can you tell me what are the symptoms of depression


Dexter: Symptoms of depression include persistent sadness, loss of interest in activities, changes in appetite, and sleep disturbances. Individuals may also experience fatigue, feelings of worthlessness, and difficulty concentrating. Severe cases may lead to thoughts of selfharm or suicide.


You:  also how can i build resilience to stress


Dexter: Building resilience involves developing strong social connections, maintaining a positive outlook, practicing selfcare, setting realistic goals, and adapting to change.


You:  how can i stay motivated to keep a new habit


Dexter: Keeping the end goal in mind, tracking progress, and rewarding yourself for small achievements can boost motivation. Dexter advises finding what personally motivates you and sticking to it.


You:  how do i choose which habit to start with


Dexter: Start with a habit that aligns with your values and goals. Dexter recommends selecting a habit that is realistic and has a meaningful impact on your life.


You:  can you tell me what are the symptoms of paranoia


Dexter: Symptoms of paranoia include constant suspicion, mistrust of others' intentions, feeling persecuted or threatened, hypervigilance, and difficulty relaxing or trusting even close friends or family.


You:  what is narcotics


Dexter: Narcotics, also known as opioids, are drugs that relieve pain and can produce euphoria. They include prescription medications like morphine and oxycodone, as well as illegal drugs like heroin. Narcotics can be highly addictive and have serious health risks.


You:  what are the symptoms of narcotic withdrawal


Dexter: Symptoms of narcotic withdrawal include anxiety, muscle aches, sweating, nausea, and vomiting. Severe withdrawal can also cause agitation, insomnia, and high blood pressure. Medical supervision is often recommended during withdrawal to manage symptoms.


You:  what is national institute of mental health sciences


Dexter: The National Institute of Mental Health Sciences (NIMHS) is a premier institution dedicated to mental health research. It focuses on understanding mental disorders and developing effective treatments. NIMHS also provides educational resources to enhance public mental health.


You:  bye


Dexter: Goodbye! Take care.
