In [176]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [177]:
# Install all the requirements
!pip install langchain-groq
!pip install chromadb
!pip install langchain_community



In [None]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    groq_api_key = '',
    model_name = "llama-3.3-70b-versatile",
    temperature=0
)

#LLM Model Response Test
response = llm.invoke("Hey there, which model are you")
print(response.content)

I'm an AI designed by Meta AI, and I'm a type of large language model. My knowledge was built from a massive corpus of text data, and I'm constantly learning and improving my responses. I don't have a personal name, but I'm often referred to as a "LLaMA" model, which stands for "Large Language Model Meta AI."


In [182]:
from langchain_community.document_loaders.csv_loader import CSVLoader

# Load the CSV data for Langchain
loader = CSVLoader(file_path="/content/drive/My Drive/train.csv")
data = loader.load()
# Print the number of documents
print(len(data))

3512


In [183]:
#Iterate and store the output data in a sperate list as dictionaries
output_data = []
# Loop through each document in the loaded data
for doc in data:
    # Get the page content (which contains both context and response)
    page_content = doc.page_content

    # Split the page content into context and response
    if "Response:" in page_content:
        context = page_content.split("Response:")[0].replace("Context:", "").strip()
        response = page_content.split("Response:")[1].strip()

        # Append the extracted context and response to the output_data list
        output_data.append({
            "context": context,
            "response": response
        })
    else:
        print("Response not found in page content.")

In [184]:
import pandas as pd
df = pd.DataFrame(output_data)
print(df.head())

                                             context  \
0  I'm going through some things with my feelings...   
1  I'm going through some things with my feelings...   
2  I'm going through some things with my feelings...   
3  I'm going through some things with my feelings...   
4  I'm going through some things with my feelings...   

                                            response  
0  If everyone thinks you're worthless, then mayb...  
1  Hello, and thank you for your question and see...  
2  First thing I'd suggest is getting the sleep y...  
3  Therapy is essential for those that are feelin...  
4  I first want to let you know that you are not ...  


In [185]:
#Unique Queries in the dataset
unique_contexts = df['context'].drop_duplicates()
# To view the unique contexts
print(len(unique_contexts))

995


In [186]:
#Uploading the Dataset as collections into chromadb if it's not present already
import uuid
import chromadb

client = chromadb.PersistentClient(path="/content/drive/My Drive/chromadb")
collection = client.get_or_create_collection("queries")

if not collection.count():
  for _, row in df.iterrows():
    collection.add(
        documents=[row["context"]],
        metadatas=[{"response": row["response"]}],
        ids=[str(uuid.uuid4())]
    )

In [187]:
from langchain.prompts import PromptTemplate

#Building the Prompt Template
prompt_extract = PromptTemplate.from_template(
    """
    ### This is a data from mental health counselling dataset
    {responses}
    ### Instruction:
    You are a mental health therapist
    This data represents a collection of mental health counselling records, including both the context (question) and the response.
    Use this data context and response to provide a summary answer to the question asked.
    Keep the answer short
    If the question is irrelevant just tell it's an irrelevant context
    ### Question:
    {question}
    ### Response:
    """
)

#Chain Extract
chain_extract = prompt_extract | llm

In [193]:
#Question
question = "give me some motivation"

responses = collection.query(
    query_texts=[question],
    n_results=5
).get('metadatas',[])


res = chain_extract.invoke(input={"responses": responses, "question": question})

#Printed Response
print(f"Response: \n{res.content}")

Response: 
To find motivation, start by establishing a self-care routine: eat balanced meals, sleep 6-8 hours, and exercise 30 minutes a day. Break tasks into smaller steps and practice self-compassion. Celebrate small achievements to build momentum and confidence.
