In [1]:
!pip install rake_nltk




[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import json
import uuid
import time
import openai
import PyPDF2
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance
from langchain.embeddings.openai import OpenAIEmbeddings
from rake_nltk import Rake

# Load configuration
with open("config.json", "r") as config_file:
    config = json.load(config_file)
OPENAI_API_KEY = config["OPENAI_API_KEY"]

# Qdrant and embedding settings
collection_name = "semantic-rake-LLM-data_collection"
qdrant_client = QdrantClient(host="localhost", port=6333)
embedding_model_name = "text-embedding-3-large"
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY, model=embedding_model_name)

# Initialize RAKE for keyword extraction
rake_extractor = Rake()

# Example SemanticChunking class (your own implementation)
chunker = SemanticChunking()

# Extract data from PDF
pdf_path = "Foundations of LLM.pdf"
documents = []
with open(pdf_path, "rb") as pdf_file:
    reader = PyPDF2.PdfReader(pdf_file)
    full_text = []
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            full_text.append(page_text)
    documents = ["\n".join(full_text)]

# Split into chunks
all_chunks = []
for doc in documents:
    chunks = chunker.create_documents([doc])
    all_chunks.extend(chunks)

vector_size = 1536

# If the collection already exists, delete and recreate it
existing_collections = [c.name for c in qdrant_client.get_collections().collections]
if collection_name in existing_collections:
    qdrant_client.delete_collection(collection_name)
    print(f"{collection_name} collection deleted, recreating...")

qdrant_client.create_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
)
print(f"{collection_name} collection successfully created.")

# While adding chunks to Qdrant, also add RAKE keywords to the payload
points = []
for chunk in all_chunks:
    vector = embeddings.embed_query(chunk)

    # Extract keywords from the chunk content with RAKE
    rake_extractor.extract_keywords_from_text(chunk)
    chunk_keywords = rake_extractor.get_ranked_phrases()[:5]  # Top 5 keywords

    point = {
        "id": str(uuid.uuid4()),
        "vector": vector,
        "payload": {
            "text": chunk,
            "keywords": chunk_keywords
        }
    }
    points.append(point)

qdrant_client.upsert(
    collection_name=collection_name,
    points=points
)
print(f"{len(points)} semantic chunks successfully added to Qdrant collection.")

def test_rag_hybrid_qdrant(qdrant_client, collection_name, query):
    print(f"\nQuery: {query}")
    start_time = time.time()

    # Compute the query embedding
    query_vector = embeddings.embed_query(query)

    # Extract keywords from the query using RAKE
    rake_extractor.extract_keywords_from_text(query)
    query_keywords = rake_extractor.get_ranked_phrases()[:3]  # Top 3 keywords

    # Retrieve 10 candidate results from Qdrant based on cosine similarity
    search_results = qdrant_client.search(
        collection_name=collection_name,
        query_vector=query_vector,
        limit=10,
        with_payload=True
    )

    # Post-processing: calculate keyword matching score for each document
    re_ranked = []
    for result in search_results:
        cosine_score = result.score  # Cosine similarity score from Qdrant
        payload = result.payload

        # Get the document's keyword list (if any)
        doc_keywords = payload.get("keywords", [])

        # Calculate match ratio between query keywords and document keywords (0-1)
        match_count = 0
        for kw in query_keywords:
            for doc_kw in doc_keywords:
                if kw.lower() in doc_kw.lower():
                    match_count += 1
                    break
        keyword_score = match_count / len(query_keywords) if query_keywords else 0

        # Hybrid score: 0.7 cosine + 0.3 keyword match
        final_score = 0.7 * cosine_score + 0.3 * keyword_score
        re_ranked.append((final_score, payload["text"]))

    # Sort results by final_score in descending order
    re_ranked.sort(key=lambda x: x[0], reverse=True)
    top_results = re_ranked[:5]

    if not top_results:
        print("No matching document found in Qdrant.")
        return

    # Build context from the best results
    context = "\n".join([doc for score, doc in top_results])
    print("\nSources used from Qdrant (hybrid ranking):")
    for idx, (final_score, doc) in enumerate(top_results, start=1):
        print(f"{idx}. (Hybrid Score: {final_score:.3f}, Length: {len(doc)}) {doc[:100]}...")

    input_text = f"Context: {context}\nQuestion: {query}\nAnswer: "
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are an AI assistant that provides precise answers based on the given context."},
            {"role": "user", "content": input_text}
        ],
        temperature=0.5,
        max_tokens=300
    )

    end_time = time.time()
    print(f"\nInference time: {round(end_time - start_time, 3)} seconds")
    print(f"\nModel's answer:\n{response.choices[0].message.content}")


semantic-rake-LLM-data_collection koleksiyonu silindi, yeniden oluşturuluyor...
semantic-rake-LLM-data_collection koleksiyonu başarıyla oluşturuldu.
136 adet semantic chunk başarıyla Qdrant koleksiyonuna eklendi.


In [4]:
query7 = "What are the key features of the Transformer architecture?"
test_rag_hybrid_qdrant(qdrant_client, collection_name, query7)


Sorgu: What are the key features of the Transformer architecture?

Qdrant'tan Kullanılan Kaynaklar (hybrid sıralama ile):
1. (Hybrid Skor: 0.554, Uzunluk: 6730) verge at certain points during optimization. The training o f LLMs is generally inﬂuenced by many fa...
2. (Hybrid Skor: 0.549, Uzunluk: 3153) This can be expressed as Output = Merge(head 1,...,head τ)Whead(2.70) where head j∈Rdhis computed us...
3. (Hybrid Skor: 0.547, Uzunluk: 4147) An example 74 Generative Models of this approach is compressive Transformer [ Rae et al. ,2019 ]. It...
4. (Hybrid Skor: 0.545, Uzunluk: 1907) [Elsken et al., 2019] Thomas Elsken, Jan Hendrik Metzen, and Frank Hutter. Neural architecture searc...
5. (Hybrid Skor: 0.544, Uzunluk: 4824) So they cannot cover functions with inﬂection points, such as double descent cur ves. In response, r...

Inference zamanı: 6.923 saniye

Modelin cevabı:
The key features of the Transformer architecture include:

1. **Layer Normalization**: Used to stabilize training

In [5]:
query10 = "What is positional encoding?"
test_rag_hybrid_qdrant(qdrant_client, collection_name, query10)


Sorgu: What is positional encoding?

Qdrant'tan Kullanılan Kaynaklar (hybrid sıralama ile):
1. (Hybrid Skor: 0.594, Uzunluk: 5334) In this case, the embedding at position ican be expressed as ei=xi+ PE(i) (2.74) where xi∈Rddenotes ...
2. (Hybrid Skor: 0.571, Uzunluk: 2879) positions are represented as combinations of sine and cosin e functions with different frequencies. ...
3. (Hybrid Skor: 0.568, Uzunluk: 1108) However, Press et al. [2022 ] found that setting βto values decreasing geometrically by a factor of1...
4. (Hybrid Skor: 0.561, Uzunluk: 7737) is the rotation matrix. If two or more rotations are performe d on the same vector, we can rotate th...
5. (Hybrid Skor: 0.557, Uzunluk: 3860) i+1for short). Suppose we have the gold- standard distribution at the same position, denoted by pgol...

Inference zamanı: 6.634 saniye

Modelin cevabı:
Positional encoding is a technique used in neural network models, particularly in transformer architectures, to inject information about the po

In [6]:
query12 = "What is semantic chunking and how does it improve search efficiency?"
test_rag_hybrid_qdrant(qdrant_client, collection_name, query12)


Sorgu: What is semantic chunking and how does it improve search efficiency?

Qdrant'tan Kullanılan Kaynaklar (hybrid sıralama ile):
1. (Hybrid Skor: 0.546, Uzunluk: 3190) Parameter-efﬁcient transfer learning for NLP. In Proceedings of the 36th International Conference on...
2. (Hybrid Skor: 0.544, Uzunluk: 3393) Long Papers) , pages 86–96, 2016. [Seo et al., 2017] Minjoon Seo, Aniruddha Kembhavi, Ali Farh adi, ...
3. (Hybrid Skor: 0.543, Uzunluk: 5572) This moti vates researchers to develop new evaluation benchmarks and metrics for long-context LLMs. ...
4. (Hybrid Skor: 0.541, Uzunluk: 3900) it remains challenging to effectively prompt LLMs. Note tha t if we face a very difﬁcult classiﬁca- ...
5. (Hybrid Skor: 0.540, Uzunluk: 5692) to operate. To address these challenges, various optimizat ion strategies, such as pruning, quantiza...

Inference zamanı: 11.257 saniye

Modelin cevabı:
Semantic chunking is a natural language processing technique that involves breaking down text into mea

In [7]:
import uuid

# Üç ayrı metin tanımlıyoruz
custom_texts = [
    "The Transformer architecture is a fundamental building block in language models. Its self-attention mechanism enables the model to capture relationships between words.",
    "Positional encoding assists models in understanding the order of words, ensuring that the sentence structure is maintained.",
    "Semantic chunking splits long texts into meaningful segments, facilitating efficient search and information retrieval. This method helps in grouping similar content together."
]

points = []
for text in custom_texts:
    # Her metin için embedding hesapla
    vector = embeddings.embed_query(text)
    
    # RAKE ile metinden anahtar kelimeleri çıkar
    rake_extractor.extract_keywords_from_text(text)
    custom_keywords = rake_extractor.get_ranked_phrases()[:5]
    
    # Her metin için ayrı point oluştur
    point = {
        "id": str(uuid.uuid4()),
        "vector": vector,
        "payload": {
            "text": text,
            "keywords": custom_keywords
        }
    }
    points.append(point)

# Tüm point’leri Qdrant koleksiyonuna ekle
qdrant_client.upsert(
    collection_name=collection_name,
    points=points
)

print("Your custom points have been successfully added to the collection!")


Your custom points have been successfully added to the collection!


In [8]:
query8 = "What are the key features of the Transformer architecture??"
test_rag_hybrid_qdrant(qdrant_client, collection_name, query8)




Sorgu: What are the key features of the Transformer architecture??

Qdrant'tan Kullanılan Kaynaklar (hybrid sıralama ile):
1. (Hybrid Skor: 0.756, Uzunluk: 167) The Transformer architecture is a fundamental building block in language models. Its self-attention ...
2. (Hybrid Skor: 0.556, Uzunluk: 6730) verge at certain points during optimization. The training o f LLMs is generally inﬂuenced by many fa...
3. (Hybrid Skor: 0.547, Uzunluk: 3153) This can be expressed as Output = Merge(head 1,...,head τ)Whead(2.70) where head j∈Rdhis computed us...
4. (Hybrid Skor: 0.546, Uzunluk: 4147) An example 74 Generative Models of this approach is compressive Transformer [ Rae et al. ,2019 ]. It...
5. (Hybrid Skor: 0.546, Uzunluk: 1925) [Kahneman, 2011] Daniel Kahneman. Thinking, fast and slow . macmillan, 2011. [Kaplan et al., 2020] J...

Inference zamanı: 9.71 saniye

Modelin cevabı:
The key features of the Transformer architecture include:

1. **Self-Attention Mechanism**: This allows the model 

In [9]:
query9 = "What is positional encoding?"
test_rag_hybrid_qdrant(qdrant_client, collection_name, query9)


Sorgu: What is positional encoding?

Qdrant'tan Kullanılan Kaynaklar (hybrid sıralama ile):
1. (Hybrid Skor: 0.918, Uzunluk: 123) Positional encoding assists models in understanding the order of words, ensuring that the sentence s...
2. (Hybrid Skor: 0.594, Uzunluk: 5334) In this case, the embedding at position ican be expressed as ei=xi+ PE(i) (2.74) where xi∈Rddenotes ...
3. (Hybrid Skor: 0.571, Uzunluk: 2879) positions are represented as combinations of sine and cosin e functions with different frequencies. ...
4. (Hybrid Skor: 0.568, Uzunluk: 1108) However, Press et al. [2022 ] found that setting βto values decreasing geometrically by a factor of1...
5. (Hybrid Skor: 0.561, Uzunluk: 7737) is the rotation matrix. If two or more rotations are performe d on the same vector, we can rotate th...

Inference zamanı: 6.697 saniye

Modelin cevabı:
Positional encoding is a technique used in models, particularly in natural language processing, to help them understand the order of words in a 

In [10]:
query11 = "What is semantic chunking and how does it improve search efficiency?"
test_rag_hybrid_qdrant(qdrant_client, collection_name, query11)


Sorgu: What is semantic chunking and how does it improve search efficiency?

Qdrant'tan Kullanılan Kaynaklar (hybrid sıralama ile):
1. (Hybrid Skor: 0.783, Uzunluk: 174) Semantic chunking splits long texts into meaningful segments, facilitating efficient search and info...
2. (Hybrid Skor: 0.564, Uzunluk: 123) Positional encoding assists models in understanding the order of words, ensuring that the sentence s...
3. (Hybrid Skor: 0.546, Uzunluk: 3190) Parameter-efﬁcient transfer learning for NLP. In Proceedings of the 36th International Conference on...
4. (Hybrid Skor: 0.544, Uzunluk: 3393) Long Papers) , pages 86–96, 2016. [Seo et al., 2017] Minjoon Seo, Aniruddha Kembhavi, Ali Farh adi, ...
5. (Hybrid Skor: 0.543, Uzunluk: 5572) This moti vates researchers to develop new evaluation benchmarks and metrics for long-context LLMs. ...

Inference zamanı: 2.453 saniye

Modelin cevabı:
Semantic chunking is a method that splits long texts into meaningful segments or chunks, which facilitate

In [11]:
query1 = "What is pre-training?"
test_rag_hybrid_qdrant(qdrant_client, collection_name, query1)


Sorgu: What is pre-training?

Qdrant'tan Kullanılan Kaynaklar (hybrid sıralama ile):
1. (Hybrid Skor: 0.870, Uzunluk: 965) 1Here we assume that tokens are basic units of text that are sep arated through tokenization. Someti...
2. (Hybrid Skor: 0.857, Uzunluk: 3860) i+1for short). Suppose we have the gold- standard distribution at the same position, denoted by pgol...
3. (Hybrid Skor: 0.725, Uzunluk: 2710) forgetting problem in continual training, where a neural network forge ts previously learned in- for...
4. (Hybrid Skor: 0.722, Uzunluk: 5110) The training objective can be deﬁned as (ˆθ,ˆω) = arg max θ,ωLoss(Model θ,ω(xnoise),x) (1.16) Here t...
5. (Hybrid Skor: 0.721, Uzunluk: 4839) The ﬁne-tuned model is then employed to classify new sequences for this task. An advantage of superv...

Inference zamanı: 3.179 saniye

Modelin cevabı:
Pre-training is a process in natural language processing (NLP) where models are initially trained on a large amount of unlabeled data to learn general 

In [12]:
query2 = "Which types of models are widely used in NLP pre-training?"
test_rag_hybrid_qdrant(qdrant_client, collection_name, query2)


Sorgu: Which types of models are widely used in NLP pre-training?

Qdrant'tan Kullanılan Kaynaklar (hybrid sıralama ile):
1. (Hybrid Skor: 0.682, Uzunluk: 9996) D dataset used for training or ﬁne-tuning a model ∂L ∂θgradient of the loss function Lwith respect t...
2. (Hybrid Skor: 0.604, Uzunluk: 965) 1Here we assume that tokens are basic units of text that are sep arated through tokenization. Someti...
3. (Hybrid Skor: 0.592, Uzunluk: 3190) Parameter-efﬁcient transfer learning for NLP. In Proceedings of the 36th International Conference on...
4. (Hybrid Skor: 0.590, Uzunluk: 5918) the discussion of these topics to the following chapters. CHAPTER 2 Generative Models One of the mos...
5. (Hybrid Skor: 0.589, Uzunluk: 3861) The use of these ex- amples does not distinguish between models, but we mark the m odel architecture...

Inference zamanı: 6.824 saniye

Modelin cevabı:
The types of models widely used in NLP pre-training include:

1. **Encoder-only Models**: Such as BERT, which is t

In [13]:
query3 = "How do we implement permuted language modelling?"
test_rag_hybrid_qdrant(qdrant_client, collection_name, query3)


Sorgu: How do we implement permuted language modelling?

Qdrant'tan Kullanılan Kaynaklar (hybrid sıralama ile):
1. (Hybrid Skor: 0.582, Uzunluk: 5918) the discussion of these topics to the following chapters. CHAPTER 2 Generative Models One of the mos...
2. (Hybrid Skor: 0.582, Uzunluk: 8453) The approach described above provides a new framework of uni versal language understanding and gener...
3. (Hybrid Skor: 0.581, Uzunluk: 7725) sense to predict any of the tokens in this sequence. 1.2.2.1 Masked Language Modeling One of the mos...
4. (Hybrid Skor: 0.578, Uzunluk: 5839) architecture to adapt LLMs to large-scale training. In Sect ion2.2we will present more discussions o...
5. (Hybrid Skor: 0.577, Uzunluk: 5843) ↓ Is Next or Not? 1.2 Self-supervised Pre-training Tasks 13 x0 x0x1 x1x2 x2x3 x3x4 x4Pr(x0) = 1 Pr(x...

Inference zamanı: 9.092 saniye

Modelin cevabı:
Permuted language modeling involves making sequential predictions of tokens in a different order than their natural sequenc

In [14]:
query4 = "What is the large-scale pre-training of the document?"
test_rag_hybrid_qdrant(qdrant_client, collection_name, query4)


Sorgu: What is the large-scale pre-training of the document?

Qdrant'tan Kullanılan Kaynaklar (hybrid sıralama ile):
1. (Hybrid Skor: 0.683, Uzunluk: 965) 1Here we assume that tokens are basic units of text that are sep arated through tokenization. Someti...
2. (Hybrid Skor: 0.682, Uzunluk: 4869) scaling laws for LLMs, which help us understand their traini ng efﬁciency and effectiveness. 2.2.1 D...
3. (Hybrid Skor: 0.677, Uzunluk: 9182) example, in He et al. [2021 ]’s work, a 1.5 billion-parameter BERT-like model is built b y increasin...
4. (Hybrid Skor: 0.674, Uzunluk: 3190) Parameter-efﬁcient transfer learning for NLP. In Proceedings of the 36th International Conference on...
5. (Hybrid Skor: 0.673, Uzunluk: 9992) [Kojima et al., 2022] Takeshi Kojima, Shixiang Shane Gu, Mac hel Reid, Yutaka Matsuo, and Yusuke Iwa...

Inference zamanı: 5.763 saniye

Modelin cevabı:
The large-scale pre-training of the document refers to the process of training large language models (LLMs) on extensiv