# Mini Project using Embeddings Vector (Documents Similarity)

# Install Libraries

In [27]:
!pip install langchain-huggingface



# Import Libraries

In [28]:
from langchain_huggingface import HuggingFaceEmbeddings

In [29]:
from sklearn.metrics.pairwise import cosine_similarity

In [30]:
import numpy as np


# Create Embeddings model

In [31]:
embeddings_model = HuggingFaceEmbeddings(
    model_name = 'sentence-transformers/all-MiniLM-L6-v2'
)

In [32]:
doc = [
    "Bangladesh is a vibrant South Asian nation with resilient people.",
    "Its fertile deltaic lands are shaped by many powerful rivers.",
    "Dhaka remains a bustling capital filled with constant urban activity.",
    "The garment industry significantly drives Bangladeshâ€™s ongoing economic growth.",
    "Climate challenges threaten communities despite the country's rapid development progress."
]

In [33]:
doc_embeddings = embeddings_model.embed_documents(texts=doc)

# Test Data

In [34]:
text = "What is capital of Bangladesh"
text_embeddings = embeddings_model.embed_query(text=text)

In [35]:
similarity_score = cosine_similarity([text_embeddings], doc_embeddings)[0]
similarity_score

array([0.58355797, 0.16624041, 0.62075787, 0.5011194 , 0.10604694])

In [36]:
max_index, max_score = sorted(list(enumerate(similarity_score)), key = lambda x:x[1])[-2]
max_score

np.float64(0.5835579745401382)

In [37]:
doc[max_index]

'Bangladesh is a vibrant South Asian nation with resilient people.'