In [None]:
import pandas as pd
import numpy as np
import zipfile
import os
import opendatasets as od


In [None]:
od.download( 
    "https://www.kaggle.com/datasets/entenam/reddit-mental-health-dataset") 

In [None]:
import os

cwd = os.getcwd()  # Get the current working directory (cwd)
files = os.listdir(cwd)  # Get all the files in that directory
print("Files in %r: %s" % (cwd, files))

In [None]:
os.getcwd()


This dataset, which includes a subset of 800 manually annotated posts, is structured differently to provide focused insights into the mental health discussions. The columns in Part B are as follows:
Score: The net score (upvotes minus downvotes) of the post.
Selftext:The main text content of the post.
Subreddit: The subreddit from which the post was sourced.
Title: The title of the Reddit post.
Label: The assigned label indicating the identified root cause of mental health
issues, based on our annotation process are : Drug and Alcohol , Early Life, Personality,Trauma and Stress

This annotation process brings additional depth to the dataset, allowing researchers to explore the underlying factors contributing to mental health issues.

In [None]:
with zipfile.ZipFile("dataset/archive.zip", 'r') as zip_ref:
    zip_ref.extractall("mental_health_data")

In [None]:
df_1 = pd.read_csv("mental_health_data/Original Reddit Data/Labelled Data/LD DA 1.csv")
df_1.head()

In [None]:
df_1.shape

In [None]:
df = pd.concat( 
    map(pd.read_csv, ['mental_health_data/Original Reddit Data/Labelled Data/LD DA 1.csv', 'mental_health_data/Original Reddit Data/Labelled Data/LD EL1.csv', 'mental_health_data/Original Reddit Data/Labelled Data/LD PF1.csv', 'mental_health_data/Original Reddit Data/Labelled Data/LD TS 1.csv']), ignore_index=True) 
df.head() 

In [None]:
df.drop_duplicates()

In [None]:
df.isnull().sum()

In [None]:
df = df.drop(['score','CAT 1'],axis = 1)

In [None]:
df

In [None]:
df = df.dropna()

In [None]:
df.to_csv("combined_data.csv",index = False)

In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader
loader = CSVLoader(file_path="combined_data.csv")

health_data = loader.load()

In [None]:
print(health_data[0])

In [None]:
loader = CSVLoader(
    file_path="combined_data.csv",
    csv_args={
        "delimiter": ",",
        "quotechar": '"',
        "fieldnames": ["selftext", "subreddit", "title","Label"],
    },
)

mental_health_data = loader.load()

In [None]:
print(mental_health_data[1:5])

In [None]:
# Chunk the data

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import Document
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

# Split the documents into smaller chunks
chunks = []
for doc in mental_health_data:
    chunked_texts = text_splitter.split_text(doc.page_content)
    for chunk_text in chunked_texts:
        chunks.append(Document(page_content=chunk_text))

In [None]:

chunks[0:5]

In [None]:
# Tokinize the data

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from sentence_transformers import SentenceTransformer
from langchain.schema import Document

# Step 1: Initialize the Hugging Face embeddings wrapper for SentenceTransformer
model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Step 2: Convert the text chunks into LangChain Documents (assuming chunks are already defined)
texts = [doc.page_content for doc in chunks]
documents = [Document(page_content=text) for text in texts]

# Step 3: Create the FAISS vector store using the Hugging Face embedding model
db = FAISS.from_texts(texts, model)

# Step 4: Confirm that FAISS has stored the embeddings
print(f"Number of vectors in the FAISS index: {db.index.ntotal}")

# # Step 5: Perform a similarity search using FAISS
# query = "What causes depression?"
# similar_docs = db.similarity_search(query)

# # Step 6: Print the retrieved documents
# for doc in similar_docs:
#     print(doc.page_content)



In [None]:
retriever = db.as_retriever()
retriever

In [None]:
# Wikipedia
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_core.pydantic_v1 import BaseModel, Field


class WikiInputs(BaseModel):
    """Inputs to the wikipedia tool."""

    query: str = Field(
        description="query to look up in Wikipedia, should be 3 or less words"
    )
api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=500)
wiki_tool = WikipediaQueryRun(
    name="wiki-tool",
    description="look up things in wikipedia",
    args_schema=WikiInputs,
    api_wrapper=api_wrapper,
    return_direct=True,
)

In [None]:
wiki_tool.run("langchain")

In [None]:
#youtube
from langchain_community.tools import YouTubeSearchTool

youtube_tool = YouTubeSearchTool()
youtube_tool.run("depression exercise")

In [None]:
# pubmed
from langchain_community.tools.pubmed.tool import PubmedQueryRun
pubmed_tool = PubmedQueryRun()
pubmed_tool.invoke("What causes depression")



In [None]:
# Google scholar
from langchain_community.tools.google_scholar import GoogleScholarQueryRun
from langchain_community.utilities.google_scholar import GoogleScholarAPIWrapper
os.environ["SERP_API_KEY"] = "f39f64a7fef852a21486840761f31b9288a3f24c4e0689c40c16bff722daf5fb"
goo_sch_tool = GoogleScholarQueryRun(api_wrapper=GoogleScholarAPIWrapper())
goo_sch_tool.run("Depression causes")



In [None]:
# Arxiv
from langchain_community.utilities import ArxivAPIWrapper
arxiv_tool = ArxivAPIWrapper()

docs = arxiv.run("1605.08386")
docs

In [76]:
from langchain.tools.retriever import create_retriever_tool
retrieval_tool = create_retriever_tool(retriever, ' Mental Health',
                      "Search for the information about mental health from custom dataset" )
retrieval_tool

Tool(name=' Mental Health', description='Search for the information about mental health from custom dataset', args_schema=<class 'langchain_core.tools.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x7f13d4564820>, retriever=VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7f13d6f50280>), document_prompt=PromptTemplate(input_variables=['page_content'], template='{page_content}'), document_separator='\n\n'), coroutine=functools.partial(<function _aget_relevant_documents at 0x7f13d45648b0>, retriever=VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7f13d6f50280>), document_prompt=PromptTemplate(input_variables=['page_content'], template='{page_content}'), document_separator='\n\n'))

In [77]:
tools = [wiki_tool, pubmed_tool,youtube_tool,retrieval_tool, arxiv]

In [99]:
import os
from langchain.agents import initialize_agent, Tool
from langchain.llms import Cohere

# Set the Cohere API key in the environment
os.environ["COHERE_API_KEY"] = "996fB69PNLQjAiH4N2fnmTl4ccii91iz2Xr4mTfE"

# Initialize the Cohere LLM
llm = Cohere(model="command-xlarge-nightly")

# Define the tools
tools = [
    Tool(name="Wikipedia", func=wiki_tool.run, description="Search information from Wikipedia"),
    Tool(name="PubMed", func=pubmed_tool.run, description="Search for scientific papers in PubMed"),
    Tool(name="YouTube", func=youtube_tool.run, description="Fetch video information from YouTube"),
    Tool(name="Vector Retrieval", func=retrieval_tool.run, description="Retrieve documents from the vector store"),
    Tool(name="Arxiv", func=arxiv.run, description="Fetch research papers from Arxiv")
]

# Initialize the agent with Cohere LLM
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent_type="zero-shot-react-description",
)




In [104]:
def format_response(wiki_data, pubmed_data, arxiv_data, youtube_data):
    response = f"## Solutions for Overcoming Depression\n\n"

    #Retrieval data 
    response += f"### retrieved from database :\n{retrieval_data}\n\n"
    
    # Wikipedia data
    response += f"### General Information (Wikipedia):\n{wiki_data}\n\n"
    
    # Arxiv papers
    response += f"### Research Papers (Arxiv):\n{arxiv_data}\n\n"
    
    # YouTube videos
    response += f"### Exercise Videos (YouTube):\n{youtube_data}\n\n"
    
    return response

query = "How to overcome anxiety and give some tips and exercise video "

# Fetch data from all tools and format the final output
retrieval_data = retrieval_tool.run(query)
wiki_data = wiki_tool.run(query)
pubmed_data = pubmed_tool.run(query)
arxiv_data = arxiv.run(query)
youtube_data = youtube_tool.run(query)



# Format the final response
formatted_response = format_response(wiki_data, pubmed_data, arxiv_data, youtube_data)
print(formatted_response)


## Solutions for Overcoming Depression

### retrieved from database :
selftext: I have started a new sales job a few months ago. And has started triggering my anxiety pretty bad. I exercise and run most days of the week and it is still not helping enough. I have been meditating too and does not seem to help yet either. I have noticed after meditating I am more prone to feel my anxiety physical sensation with tension in head and stomach. It is best to accept these sensations and just keep on meditating. Or is it better to just try to make my self feel courage when I experience anxiety and these feelings? Advice is much appreciated
subreddit: Anxiety
title: Need Help With Best Way to Deal with My Anxiety
Label: Trauma and Stress

Has anyone figured out a way to balance not going in about every little thing and making sure you’re being respected and heard?

Also, what are some things y’all do to ground yourself during an anxiety or panic attack? Bc the techniques I have ain’t doing a gd t