In [1]:
from langchain_community.retrievers import ArxivRetriever

retriever = ArxivRetriever(top_k_results=3)

In [2]:
query = "AudioCLIP adds audio as an additional modality into a CLIP framework, enabling zero-shot audio classification."
docs = retriever.invoke(query)
for doc in docs:
    print(doc)
    print("-" * 50)

page_content='In the past, the rapidly evolving field of sound classification greatly
benefited from the application of methods from other domains. Today, we observe
the trend to fuse domain-specific tasks and approaches together, which provides
the community with new outstanding models.
  In this work, we present an extension of the CLIP model that handles audio in
addition to text and images. Our proposed model incorporates the ESResNeXt
audio-model into the CLIP framework using the AudioSet dataset. Such a
combination enables the proposed model to perform bimodal and unimodal
classification and querying, while keeping CLIP's ability to generalize to
unseen datasets in a zero-shot inference fashion.
  AudioCLIP achieves new state-of-the-art results in the Environmental Sound
Classification (ESC) task, out-performing other approaches by reaching
accuracies of 90.07% on the UrbanSound8K and 97.15% on the ESC-50 datasets.
Further it sets new baselines in the zero-shot ESC-task on the sa

In [3]:
from langchain_openai import OpenAIEmbeddings
from sklearn.metrics.pairwise import cosine_similarity

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")


def calculate_similarity(page_content: str, query: str) -> float:
    doc_embedding = embeddings.embed_query(page_content)
    query_embedding = embeddings.embed_query(query)
    similarity = cosine_similarity([doc_embedding], [query_embedding]).item()
    return similarity


for doc in docs:
    print(calculate_similarity(doc.page_content, query))

0.711873196241966
0.5347228688692579
0.5064669280999857


In [11]:
import re


def is_arxiv_identifier(query: str) -> bool:
    pattern = r"\d{2}(0[1-9]|1[0-2])\.\d{4,5}(v\d+|)|\d{7}.*"
    for item in query[:300].split():
        if not re.match(pattern, item) or re.match(pattern, item).group(0) != item:
            return False
    return True


is_arxiv_identifier(query)

False

In [12]:
links = [doc.metadata["Entry ID"].replace("/abs/", "/pdf/") for doc in docs]
print(links)

['http://arxiv.org/pdf/2106.13043v1', 'http://arxiv.org/pdf/2210.05060v1', 'http://arxiv.org/pdf/2404.01751v2']


In [13]:
from Tools import webloader

content_list = []
for link in links:
    content = webloader(link)
    content_list.append(content)


for content in content_list:
    print(calculate_similarity(content, query))

Could not load the custom kernel for multi-scale deformable attention: /home/teron/.cache/torch_extensions/py312_cu124/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
Could not load the custom kernel for multi-scale deformable attention: /home/teron/.cache/torch_extensions/py312_cu124/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
Could not load the custom kernel for multi-scale deformable attention: /home/teron/.cache/torch_extensions/py312_cu124/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
Could not load the custom kernel for multi-scale deformable attention: /home/teron/.cache/torch_extensions/py312_cu124/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
Could not load the custom kernel for

0.5819485057638034
0.5515498948932065
0.4616582145934405


In [None]:
from Tools import webloader

content = webloader("http://arxiv.org/pdf/2309.03905v2")
print(content)