## Get API key 

In [1]:
import os
import getpass

os.environ["Cohere_API_KEY"] = getpass.getpass()

In [4]:
cohere_api_key = os.environ["Cohere_API_KEY"]
print(cohere_api_key)

nt9wg84wNjGfVEwC9ysQuAVdEJlT9bDYjtMeMA8g


## Load Cohere chat model

In [5]:
from langchain_cohere import ChatCohere

chat_model = ChatCohere(cohere_api_key=cohere_api_key, model="command-r-plus", temperature=0.2)

## Load Embedding models

In [8]:
from langchain_cohere import CohereEmbeddings

embeddings = CohereEmbeddings(
    cohere_api_key=cohere_api_key,
    model = "embed-multilingual-light-v3.0",
    max_retries=3,
    request_timeout=20
)

In [10]:
documents = ["کیهان کلهر یکی از برجسته‌ترین نوازندگان کمانچه است که با تکنیک‌های خاص خود، این ساز را به سطح جدیدی از شهرت رسانده است.",
             "سازهای کوبه‌ای در موسیقی جهانی به عنوان ابزاری برای ایجاد ریتم و تنوع در آهنگ‌ها شناخته می‌شوند.",
             "فوتبال (به انگلیسی: Football) یک ورزش تیمی و محبوب‌ترین ورزش در بیشتر کشورهای جهان است."]

doc_embed = embeddings.embed_documents(documents)

for embed in doc_embed:
    print(embed[:10])

[-0.016464233, 0.094177246, 0.026947021, -0.07098389, -0.061157227, -0.028930664, 0.034454346, -0.066833496, 0.091674805, 0.09765625]
[0.019454956, 0.060821533, 0.00308609, -0.10998535, 0.060028076, 0.007068634, 0.042907715, -0.006958008, 0.051757812, 0.072753906]
[-0.081848145, 0.16064453, -0.048187256, -0.062683105, 0.07159424, 0.010070801, -0.057434082, 0.013404846, 0.051818848, 0.009689331]


In [13]:
query = "نوازندگان مشهور کمانچه در ایران چه کسانی هستند؟"
query_embed = embeddings.embed_query(query)

print(query_embed[:10])
print(len(query_embed))

[0.02671814, 0.14953613, 0.014564514, -0.08947754, -0.08135986, -0.009742737, 0.061462402, -0.014282227, 0.04888916, 0.0413208]
384


In [14]:
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.5/13.5 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading joblib-1.4.2-py3-none-any.whl (301 kB)
Downloading scipy-1.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.6/37.6 MB[0m [31m3.4 MB/s[0m eta [36m

In [22]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

for embed in doc_embed:
    similarity = cosine_similarity(np.array(embed).reshape(1, -1), np.array(query_embed).reshape(1, -1))
    print("Cosine similarity between doc and query: ", similarity[0][0])

Cosine similarity between doc and query:  0.6328394806663404
Cosine similarity between doc and query:  0.3489902512899745
Cosine similarity between doc and query:  0.17526300179371856


## Huggingface

In [23]:
!pip install langchain-huggingface==0.0.3

Collecting langchain-huggingface==0.0.3
  Downloading langchain_huggingface-0.0.3-py3-none-any.whl.metadata (1.2 kB)
Collecting langchain-core<0.3,>=0.1.52 (from langchain-huggingface==0.0.3)
  Downloading langchain_core-0.2.43-py3-none-any.whl.metadata (6.2 kB)
Collecting sentence-transformers>=2.6.0 (from langchain-huggingface==0.0.3)
  Downloading sentence_transformers-4.0.1-py3-none-any.whl.metadata (13 kB)
Collecting transformers>=4.39.0 (from langchain-huggingface==0.0.3)
  Downloading transformers-4.50.1-py3-none-any.whl.metadata (39 kB)
Collecting langsmith<0.2.0,>=0.1.112 (from langchain-core<0.3,>=0.1.52->langchain-huggingface==0.0.3)
  Downloading langsmith-0.1.147-py3-none-any.whl.metadata (14 kB)
Collecting tenacity!=8.4.0,<9.0.0,>=8.1.0 (from langchain-core<0.3,>=0.1.52->langchain-huggingface==0.0.3)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting torch>=1.11.0 (from sentence-transformers>=2.6.0->langchain-huggingface==0.0.3)
  Downloading torch

In [26]:
from langchain_huggingface import HuggingFaceEmbeddings

hf_embedding = HuggingFaceEmbeddings(model_name = 'paraphrase-multilingual-MiniLM-L12-v2')


  from .autonotebook import tqdm as notebook_tqdm


In [27]:
doc_embed = hf_embedding.embed_documents(documents)

for embed in doc_embed:
    print(embed[:10])

[-0.048902641981840134, 0.18372699618339539, -0.14715974032878876, -0.07166250050067902, -0.46291738748550415, 0.07692016661167145, 0.2535056173801422, 0.07882627844810486, -0.08856236934661865, -0.2515318989753723]
[-0.11052107810974121, -0.34898847341537476, -0.04259101301431656, 0.07889197021722794, -0.5047726631164551, -0.050711799412965775, -0.012866770848631859, -0.11027435213327408, 0.09799669682979584, -0.1287292242050171]
[-0.026968447491526604, -0.08886303007602692, -0.20010356605052948, -0.3204899728298187, 0.19665610790252686, -0.014215723611414433, 0.27654486894607544, 0.235019251704216, 0.17608347535133362, 0.3276195824146271]


In [28]:
query_embed = hf_embedding.embed_query(query)
print(query_embed[:10])

[0.07243703305721283, 0.03182647377252579, -0.14401094615459442, -0.15100939571857452, -0.2456386238336563, 0.3141781985759735, 0.39073362946510315, 0.10063851624727249, 0.03672315925359726, -0.03940468281507492]


In [29]:
for embed in doc_embed:
    similarity = cosine_similarity(np.array(embed).reshape(1,-1), np.array(query_embed).reshape(1,-1))
    print("Cosine similarity between doc and query: ", similarity[0][0])

Cosine similarity between doc and query:  0.46105746659269636
Cosine similarity between doc and query:  0.4342183331678682
Cosine similarity between doc and query:  0.10393263240002416
