In [11]:
!pip install langchain




In [22]:
!pip install faiss-cpu

Collecting faiss-cpu
  Obtaining dependency information for faiss-cpu from https://files.pythonhosted.org/packages/17/d2/c90a810b594d11f66b0162a08415029b7e056243a5023a7d5c719353a057/faiss_cpu-1.8.0.post1-cp311-cp311-macosx_10_14_x86_64.whl.metadata
  Downloading faiss_cpu-1.8.0.post1-cp311-cp311-macosx_10_14_x86_64.whl.metadata (3.7 kB)
Downloading faiss_cpu-1.8.0.post1-cp311-cp311-macosx_10_14_x86_64.whl (7.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.3/7.3 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0.post1


In [1]:
import pandas as pd

In [3]:
df= pd.read_csv("Symptom2Disease.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,label,text
0,0,Psoriasis,I have been experiencing a skin rash on my arm...
1,1,Psoriasis,"My skin has been peeling, especially on my kne..."
2,2,Psoriasis,I have been experiencing joint pain in my fing...
3,3,Psoriasis,"There is a silver like dusting on my skin, esp..."
4,4,Psoriasis,"My nails have small dents or pits in them, and..."


In [4]:
df.drop('Unnamed: 0',axis=1,inplace=True)

In [10]:
print(f"{df['label'][0]} : {df['text'][0]}")

Psoriasis : I have been experiencing a skin rash on my arms, legs, and torso for the past few weeks. It is red, itchy, and covered in dry, scaly patches.


In [16]:
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import CharacterTextSplitter

# Load the CSV document
loader = CSVLoader(file_path="Symptom2Disease.csv")
documents = loader.load()

# Split documents into smaller chunks (optional)
text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# Use the documents in your LangChain application
#print(docs)  # Output: List of Document objects

In [26]:
docs[0]

Document(metadata={'source': 'Symptom2Disease.csv', 'row': 0}, page_content=': 0\nlabel: Psoriasis\ntext: I have been experiencing a skin rash on my arms, legs, and torso for the past few weeks. It is red, itchy, and covered in dry, scaly patches.')

In [28]:
from langchain_community.embeddings import OllamaEmbeddings

embeddings=(
    OllamaEmbeddings(model="llama3.2:1b") #llama3.2:1b #llama3.1
)
# Refrence doc : https://ollama.com/blog/embedding-models

In [29]:
embeddings

OllamaEmbeddings(base_url='http://localhost:11434', model='llama3.2:1b', embed_instruction='passage: ', query_instruction='query: ', mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None, show_progress=False, headers=None, model_kwargs=None)

## FAISS

In [25]:
from langchain_community.vectorstores import FAISS

In [None]:
db=FAISS.from_documents(docs,embeddings)
db # Takes 5-10 minutes

In [None]:
### querying 
query="My skin has been peeling, especially on my knees, elbows, and scalp. This peeling is often accompanied by a burning or stinging sensation."
docs=db.similarity_search(query)
docs[0].page_content

#### As a Retriever
We can also convert the vectorstore into a Retriever class. This allows us to easily use it in other LangChain methods, which largely work with retrievers

In [None]:
retriever=db.as_retriever()
docs=retriever.invoke(query)
docs[0].page_content

#### Similarity Search with score
There are some FAISS specific methods. One of them is similarity_search_with_score, which allows you to return not only the documents but also the distance score of the query to them. The returned distance score is L2 distance. Therefore, a lower score is better.

In [None]:
docs_and_score=db.similarity_search_with_score(query)
docs_and_score

In [None]:
embedding_vector=embeddings.embed_query(query)
embedding_vector

In [None]:
docs_score=db.similarity_search_by_vector(embedding_vector)
docs_score

In [None]:
### Saving And Loading
db.save_local("faiss_index")

In [None]:
new_db=FAISS.load_local("faiss_index",embeddings,allow_dangerous_deserialization=True)
docs=new_db.similarity_search(query)