<a href="https://colab.research.google.com/github/shosseini811/llm-examples/blob/main/TransfomersRag.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers torch

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load model and tokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Query and documents
query = "What is the capital of France?"
documents = [
    "The capital of France is Paris.",
    "France is a country in Europe.",
    "Paris is a major city."
]
doc_probs = [0.7, 0.2, 0.1]  # p(z|x) for each document

# Function to get token probabilities for a response given a document
def get_token_probs(query, document, response):
    input_text = f"{query} {document}"
    input_ids = tokenizer.encode(input_text, return_tensors="pt")

    # Tokenize the response and get token strings for display
    response_ids = tokenizer.encode(response, add_special_tokens=False)
    response_tokens = tokenizer.convert_ids_to_tokens(response_ids)

    print(f"\nTokenized response: {response_tokens}")

    # Calculate probabilities for each token
    token_probs = []
    for i, token_id in enumerate(response_ids):
        token = response_tokens[i]

        # Create input with previous response tokens
        if i == 0:
            curr_input_ids = input_ids
            context = tokenizer.decode(input_ids[0])
        else:
            prev_tokens = response_ids[:i]
            curr_input_ids = torch.cat([input_ids, torch.tensor([prev_tokens]).to(input_ids.device)], dim=1)
            context = tokenizer.decode(input_ids[0]) + " " + tokenizer.decode(prev_tokens)

        # Get model output
        with torch.no_grad():
            outputs = model(curr_input_ids)
            logits = outputs.logits[:, -1, :]
            probs = torch.softmax(logits, dim=-1)
            token_prob = probs[0, token_id].item()
            token_probs.append(token_prob)

            print(f"  Token: '{token}' | Probability: {token_prob:.6f} | Given context: '{context}'")

    # Calculate p(y|x,z) as product of token probabilities
    sequence_prob = torch.prod(torch.tensor(token_probs)).item()
    print(f"  p(y|x,z) = {' × '.join([f'{p:.6f}' for p in token_probs])} = {sequence_prob:.10f}")

    return sequence_prob, token_probs

# Calculate RAG-Sequence probability
response = "The capital is Paris"
rag_sequence_prob = 0
all_doc_contributions = []

print(f"Query: '{query}'")
print(f"Response: '{response}'")

for i, (doc, doc_prob) in enumerate(zip(documents, doc_probs)):
    print(f"\n--- Document {i+1}: '{doc}' (p(z|x) = {doc_prob}) ---")

    # Calculate p(y|x,z) for this document
    response_prob, token_probs = get_token_probs(query, doc, response)

    # Calculate contribution to final probability
    contribution = doc_prob * response_prob
    all_doc_contributions.append(contribution)

    print(f"  Contribution to final probability: {doc_prob} × {response_prob:.10f} = {contribution:.10f}")

    # Add to weighted sum: p(z|x) * p(y|x,z)
    rag_sequence_prob += contribution

print("\n--- Final RAG-Sequence Probability ---")
print(f"p_RAG-Sequence(y|x) = {' + '.join([f'{c:.10f}' for c in all_doc_contributions])} = {rag_sequence_prob:.10f}")

# Additional analysis - which document contributed most?
max_contribution_idx = all_doc_contributions.index(max(all_doc_contributions))
print(f"\nDocument with highest contribution: Document {max_contribution_idx+1} ('{documents[max_contribution_idx]}')")
print(f"Percentage of final probability: {(all_doc_contributions[max_contribution_idx]/rag_sequence_prob)*100:.2f}%")

Query: 'What is the capital of France?'
Response: 'The capital is Paris'

--- Document 1: 'The capital of France is Paris.' (p(z|x) = 0.7) ---

Tokenized response: ['The', 'Ġcapital', 'Ġis', 'ĠParis']
  Token: 'The' | Probability: 0.000208 | Given context: 'What is the capital of France? The capital of France is Paris.'
  Token: 'Ġcapital' | Probability: 0.782313 | Given context: 'What is the capital of France? The capital of France is Paris. The'
  Token: 'Ġis' | Probability: 0.018124 | Given context: 'What is the capital of France? The capital of France is Paris. The capital'
  Token: 'ĠParis' | Probability: 0.362402 | Given context: 'What is the capital of France? The capital of France is Paris. The capital is'
  p(y|x,z) = 0.000208 × 0.782313 × 0.018124 × 0.362402 = 0.0000010703
  Contribution to final probability: 0.7 × 0.0000010703 = 0.0000007492

--- Document 2: 'France is a country in Europe.' (p(z|x) = 0.2) ---

Tokenized response: ['The', 'Ġcapital', 'Ġis', 'ĠParis']
  Token:

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load model and tokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Query and expanded set of documents
query = "What are some interesting facts about Paris, France?"
documents = [
    "Paris is the capital of France and is known as the City of Light.",
    "The Eiffel Tower in Paris was built for the 1889 World's Fair and stands 324 meters tall.",
    "France is a country in Western Europe with a population of about 67 million people.",
    "Paris hosts many famous museums including the Louvre, which houses the Mona Lisa.",
    "The Seine River flows through Paris and has 37 bridges within the city limits.",
    "French cuisine is famous worldwide, with Paris having over 70 Michelin-starred restaurants.",
    "The Notre-Dame Cathedral in Paris began construction in 1163 and was damaged by fire in 2019."
]
# Assign probabilities to documents based on relevance to query
doc_probs = [0.25, 0.20, 0.05, 0.18, 0.12, 0.08, 0.12]  # p(z|x) for each document

# Function to get token probabilities for a response given a document
def get_token_probs(query, document, response):
    input_text = f"{query} {document}"
    input_ids = tokenizer.encode(input_text, return_tensors="pt")

    # Tokenize the response and get token strings for display
    response_ids = tokenizer.encode(response, add_special_tokens=False)
    response_tokens = tokenizer.convert_ids_to_tokens(response_ids)

    print(f"\nTokenized response ({len(response_tokens)} tokens): {response_tokens}")

    # Calculate probabilities for each token
    token_probs = []
    for i, token_id in enumerate(response_ids):
        token = response_tokens[i]

        # Create input with previous response tokens
        if i == 0:
            curr_input_ids = input_ids
            context = tokenizer.decode(input_ids[0])
        else:
            prev_tokens = response_ids[:i]
            curr_input_ids = torch.cat([input_ids, torch.tensor([prev_tokens]).to(input_ids.device)], dim=1)
            context = tokenizer.decode(input_ids[0]) + " " + tokenizer.decode(prev_tokens)

        # Get model output
        with torch.no_grad():
            outputs = model(curr_input_ids)
            logits = outputs.logits[:, -1, :]
            probs = torch.softmax(logits, dim=-1)
            token_prob = probs[0, token_id].item()
            token_probs.append(token_prob)

            # For brevity, only show first 50 chars of context
            short_context = context[:50] + "..." if len(context) > 50 else context
            print(f"  Token: '{token}' | Probability: {token_prob:.6f} | Given context: '{short_context}...'")

    # Calculate p(y|x,z) as product of token probabilities
    sequence_prob = torch.prod(torch.tensor(token_probs)).item()

    # For readability, only show first few and last few token probabilities if there are many
    if len(token_probs) > 8:
        prob_str = f"{' × '.join([f'{p:.6f}' for p in token_probs[:3]])} × ... × {' × '.join([f'{p:.6f}' for p in token_probs[-3:]])}"
    else:
        prob_str = f"{' × '.join([f'{p:.6f}' for p in token_probs])}"

    print(f"  p(y|x,z) = {prob_str} = {sequence_prob:.10f}")

    return sequence_prob, token_probs

# More complex response
response = "Paris is the capital of France and is famous for the Eiffel Tower, which was built in 1889. The city is known for its art museums like the Louvre and its beautiful architecture."

rag_sequence_prob = 0
all_doc_contributions = []

print(f"Query: '{query}'")
print(f"Response: '{response}'")

# Calculate document contributions
for i, (doc, doc_prob) in enumerate(zip(documents, doc_probs)):
    print(f"\n--- Document {i+1}: '{doc}' (p(z|x) = {doc_prob}) ---")

    # Calculate p(y|x,z) for this document
    response_prob, token_probs = get_token_probs(query, doc, response)

    # Calculate contribution to final probability
    contribution = doc_prob * response_prob
    all_doc_contributions.append(contribution)

    print(f"  Contribution to final probability: {doc_prob} × {response_prob:.10f} = {contribution:.10f}")

    # Add to weighted sum: p(z|x) * p(y|x,z)
    rag_sequence_prob += contribution

print("\n--- Final RAG-Sequence Probability ---")
print(f"p_RAG-Sequence(y|x) = {' + '.join([f'{c:.10e}' for c in all_doc_contributions])} = {rag_sequence_prob:.10e}")

# Additional analysis
print("\n--- Document Contribution Analysis ---")
# Sort documents by contribution
sorted_contributions = sorted([(i, doc, prob, contrib)
                              for i, (doc, prob, contrib) in
                              enumerate(zip(documents, doc_probs, all_doc_contributions))],
                             key=lambda x: x[3], reverse=True)

# Show contribution breakdown
for i, doc, prob, contrib in sorted_contributions:
    percentage = (contrib/rag_sequence_prob)*100
    print(f"Document {i+1}: {percentage:.2f}% of final probability")
    print(f"  - Content: '{doc}'")
    print(f"  - p(z|x): {prob}")
    print(f"  - Contribution: {contrib:.10e}\n")

# Token analysis - which documents gave highest probabilities for key tokens
print("--- Token Probability Analysis ---")
# For demonstration, analyze a few key tokens in the response
key_tokens = ["Paris", "Eiffel", "Tower", "Louvre"]
tokenized_response = tokenizer.encode(response, add_special_tokens=False)
response_tokens = tokenizer.convert_ids_to_tokens(tokenized_response)

for key_token in key_tokens:
    print(f"\nAnalyzing token: '{key_token}'")
    # Find positions of this token in the response
    positions = [i for i, token in enumerate(response_tokens) if key_token.lower() in token.lower()]

    if positions:
        for pos in positions:
            print(f"  Position {pos} ('{response_tokens[pos]}')")
            # Compare probabilities across documents
            token_probs_across_docs = []
            for i, doc in enumerate(documents):
                input_text = f"{query} {doc}"
                input_ids = tokenizer.encode(input_text, return_tensors="pt")

                # Get previous tokens
                prev_tokens = tokenized_response[:pos]
                if prev_tokens:
                    curr_input_ids = torch.cat([input_ids, torch.tensor([prev_tokens]).to(input_ids.device)], dim=1)
                else:
                    curr_input_ids = input_ids

                # Get probability
                with torch.no_grad():
                    outputs = model(curr_input_ids)
                    logits = outputs.logits[:, -1, :]
                    probs = torch.softmax(logits, dim=-1)
                    token_prob = probs[0, tokenized_response[pos]].item()
                    token_probs_across_docs.append((i, token_prob))

            # Sort and display
            token_probs_across_docs.sort(key=lambda x: x[1], reverse=True)
            for i, prob in token_probs_across_docs[:3]:  # Show top 3
                print(f"    Doc {i+1}: {prob:.6f} - '{documents[i][:50]}...'")
    else:
        print(f"  Token not found directly in response")

Query: 'What are some interesting facts about Paris, France?'
Response: 'Paris is the capital of France and is famous for the Eiffel Tower, which was built in 1889. The city is known for its art museums like the Louvre and its beautiful architecture.'

--- Document 1: 'Paris is the capital of France and is known as the City of Light.' (p(z|x) = 0.25) ---

Tokenized response (39 tokens): ['Paris', 'Ġis', 'Ġthe', 'Ġcapital', 'Ġof', 'ĠFrance', 'Ġand', 'Ġis', 'Ġfamous', 'Ġfor', 'Ġthe', 'ĠE', 'iff', 'el', 'ĠTower', ',', 'Ġwhich', 'Ġwas', 'Ġbuilt', 'Ġin', 'Ġ1889', '.', 'ĠThe', 'Ġcity', 'Ġis', 'Ġknown', 'Ġfor', 'Ġits', 'Ġart', 'Ġmuseums', 'Ġlike', 'Ġthe', 'ĠLou', 'vre', 'Ġand', 'Ġits', 'Ġbeautiful', 'Ġarchitecture', '.']
  Token: 'Paris' | Probability: 0.000183 | Given context: 'What are some interesting facts about Paris, Franc......'
  Token: 'Ġis' | Probability: 0.657991 | Given context: 'What are some interesting facts about Paris, Franc......'
  Token: 'Ġthe' | Probability: 0.160227 | Gi