In [1]:
from tools import fetch_arxiv_papers

papers = fetch_arxiv_papers("Language Models", 50)

In [2]:
[paper["title"] for paper in papers]

['TextRegion: Text-Aligned Region Tokens from Frozen Image-Text Models',
 'Argus: Vision-Centric Reasoning with Grounded Chain-of-Thought',
 'From Chat Logs to Collective Insights: Aggregative Question Answering',
 'MMSI-Bench: A Benchmark for Multi-Image Spatial Intelligence',
 'ZeroGUI: Automating Online GUI Learning at Zero Human Cost',
 'Sketch Down the FLOPs: Towards Efficient Networks for Human Sketch',
 'Differential Information: An Information-Theoretic Perspective on Preference Optimization',
 'Model Immunization from a Condition Number Perspective',
 "Puzzled by Puzzles: When Vision-Language Models Can't Take a Hint",
 'Impromptu VLA: Open Weights and Open Data for Driving Vision-Language-Action Models',
 'LoRAShop: Training-Free Multi-Concept Image Generation and Editing with Rectified Flow Transformers',
 'DeepTheorem: Advancing LLM Reasoning for Theorem Proving Through Natural Language and Reinforcement Learning',
 'ThinkGeo: Evaluating Tool-Augmented Agents for Remote Sen

In [6]:
from llama_index.core import Document

def create_documents_from_papers(papers):
    documents = []
    for paper in papers:
        content = (
            f"Title: {paper['title']}\n"
            f"Authors: {', '.join(paper['authors'])}\n"
            f"Summary: {paper['summary']}\n"
            f"Published: {paper['published']}\n"
            f"Journal Reference: {paper['journal_ref']}\n"
            f"DOI: {paper['doi']}\n"
            f"Primary Category: {paper['primary_category']}\n"
            f"Categories: {', '.join(paper['categories'])}\n"
            f"PDF URL: {paper['pdf_url']}\n"
            f"arXiv URL: {paper['arxiv_url']}\n"
        )
        documents.append(Document(text=content))
    return documents

In [7]:
documents = create_documents_from_papers(papers)

In [8]:
documents

[Document(id_='ba8b21c0-4175-4c37-906b-db64d1e900ae', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='Title: TextRegion: Text-Aligned Region Tokens from Frozen Image-Text Models\nAuthors: Yao Xiao, Qiqian Fu, Heyi Tao, Yuqun Wu, Zhen Zhu, Derek Hoiem\nSummary: Image-text models excel at image-level tasks but struggle with detailed\nvisual understanding. While these models provide strong visual-language\nalignment, segmentation models like SAM2 offer precise spatial boundaries for\nobjects. To this end, we propose TextRegion, a simple, effective, and\ntraining-free framework that combines the strengths of image-text models and\nSAM2 to generate powerful text-aligned region tokens. These tokens enable\ndetailed visual understanding while preserving open-vocabulary capabilities.\nThey can be directly appli

In [9]:
from llama_index.core import Settings, VectorStoreIndex
from constants import embed_model

Settings.chunk_size = 1024
Settings.chunk_overlap = 50

index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

In [11]:
index.storage_context.persist("index/")