# Guidelines and Examples Embeddings Setup

This notebook prepares guidelines and case examples for use in case generation by:
1. Loading guidelines and splitting them into chunks
2. Loading example cases and generating summaries
3. Creating embeddings for all content
4. Storing in a Qdrant collection for similarity search during case generation

In [1]:
# Import required libraries
import sys
import os
import json
import time
import uuid
import re
from pathlib import Path
from tqdm.notebook import tqdm
import pandas as pd
from dotenv import load_dotenv
import google.generativeai as genai
from IPython.display import display, Markdown
from qdrant_client import QdrantClient
from qdrant_client.http import models

sys.path.append("..")
from utils.qdrant_client import get_qdrant_client, get_embedding

# Load environment variables
load_dotenv()

# Configure Google Generative AI
genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))

## Test Connection and Setup

In [2]:
# Test connection to Qdrant
client = get_qdrant_client()
try:
    collections = client.get_collections().collections
    print(f"✅ Successfully connected to Qdrant Cloud")
    print(f"Available collections: {[c.name for c in collections]}")
except Exception as e:
    print(f"❌ Failed to connect to Qdrant Cloud: {e}")
    
# Test embedding generation
test_embedding = get_embedding("This is a test for embedding generation")
print(f"Test embedding dimension: {len(test_embedding)}")

✅ Successfully connected to Qdrant Cloud
Available collections: ['logistics_datapoints']
Test embedding dimension: 768


## Create New Collection for Guidelines and Examples

In [3]:
# Create collection for guidelines and examples
COLLECTION_NAME = "case_generation_references"

# Check if collection already exists
collections = client.get_collections().collections
if any(c.name == COLLECTION_NAME for c in collections):
    print(f"Collection '{COLLECTION_NAME}' already exists")
    recreate = input("Do you want to recreate the collection? (y/n): ")
    if recreate.lower() == 'y':
        client.delete_collection(COLLECTION_NAME)
        print(f"Deleted existing collection '{COLLECTION_NAME}'")
    else:
        print("Keeping existing collection")

# Create collection if it doesn't exist or was deleted
collections = client.get_collections().collections
if not any(c.name == COLLECTION_NAME for c in collections):
    # Create collection with Google's embedding dimension (768)
    client.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=models.VectorParams(
            size=768,  # Google text-embedding-004 dimension
            distance=models.Distance.COSINE
        ),
        on_disk_payload=True,  # Store payload on disk for larger datasets
    )
    print(f"Created collection '{COLLECTION_NAME}'")
    
    # Create indices for efficient filtering
    index_fields = [
        "content_type",  # guideline or example
        "guideline_type",  # general, bishou, maritime, container
        "chunk_id",
        "filename",
        "title"
    ]
    
    for field in index_fields:
        client.create_payload_index(
            collection_name=COLLECTION_NAME,
            field_name=field,
            field_schema=models.PayloadSchemaType.KEYWORD
        )
    print(f"Created indices for fields: {', '.join(index_fields)}")

Created collection 'case_generation_references'
Created indices for fields: content_type, guideline_type, chunk_id, filename, title


## Generate Text with Gemini

In [4]:
def generate_with_llm(prompt, model="gemini-2.0-flash-exp", max_retries=3):
    """Generate text using Gemini with rate limiting and retries"""
    for attempt in range(max_retries):
        try:
            model_instance = genai.GenerativeModel(model_name=model)
            response = model_instance.generate_content(prompt)
            return response.text
        except Exception as e:
            if attempt < max_retries - 1:
                wait_time = 2 ** attempt + 1  # Exponential backoff
                print(f"Generation attempt {attempt+1} failed: {e}. Retrying after {wait_time}s...")
                time.sleep(wait_time)
            else:
                print(f"Failed after {max_retries} attempts: {e}")
                return "Failed to generate content"

# Test the generation function
test_response = generate_with_llm("Summarize the key principles of maritime logistics in one sentence.")
print(f"Test generation response: {test_response}")

Test generation response: Maritime logistics is about efficiently and effectively managing the flow of goods, information, and resources across the sea to ensure timely, secure, and cost-effective delivery from origin to destination.



## Process Guidelines

In [6]:
# Required imports for markdown processing
import re
from langchain.text_splitter import MarkdownHeaderTextSplitter

In [8]:
def split_markdown(text):
    headers_to_split_on = [
        ("#", "Header 1"),
        ("##", "Header 2"),
        ("###", "Header 3"),
        ("####", "Header 4"),
        ("#####", "Header 5"),
        ("######", "Header 6"),
    ]

    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
    md_elements = markdown_splitter.split_text(text)

    chunks = []
    current_chunk = ""
    current_chunk_len = 0

    for element in md_elements:
        element_text = element.page_content
        element_len = len(element_text)

        if current_chunk_len + element_len > 2000:
            if current_chunk:
                chunks.append(current_chunk.strip())
                current_chunk = ""
                current_chunk_len = 0

            # If the element itself is longer than 2000 characters, split it
            while element_len > 2000:
                chunks.append(element_text[:2000].strip())
                element_text = element_text[2000:]
                element_len -= 2000

            current_chunk = element_text
            current_chunk_len = element_len
        else:
            current_chunk += "\n\n" + element_text if current_chunk else element_text
            current_chunk_len += element_len

    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks

In [9]:
def process_markdown_with_metadata(text, guideline_type, file_path):
    """Process markdown with enhanced metadata"""
    chunks = split_markdown(text)
    results = []

    for i, chunk in enumerate(chunks):
        title = extract_title(chunk)
        # Calculate chunk overlap with next chunk if available
        overlap = ""
        if i < len(chunks) - 1:
            next_chunk = chunks[i + 1]
            next_lines = next_chunk.split("\n")
            # Get header of next section for context
            for line in next_lines:
                if re.match(r'^#+\s+', line):
                    overlap = line
                    break
                    
        # Get header level for better hierarchy understanding
        header_level = 1
        lines = chunk.strip().split('\n')
        for line in lines:
            header_match = re.match(r'^(#+)\s+', line)
            if header_match:
                header_level = len(header_match.group(1))
                break
                
        results.append({
            "content_type": "guideline",
            "guideline_type": guideline_type,
            "chunk_id": f"{guideline_type}_{i}",
            "title": title,
            "content": chunk,
            "header_level": header_level,
            "next_section": overlap,
            "filename": file_path.split("/")[-1],
            "position": i
        })

    return results

In [11]:
def extract_title(chunk):
    """Extract title from a markdown chunk"""
    lines = chunk.strip().split('\n')
    for line in lines:
        # Look for a markdown header
        if re.match(r'^#+\s+', line):
            return re.sub(r'^#+\s+', '', line).strip()
    # If no header is found, use the first non-empty line
    for line in lines:
        if line.strip():
            return line[:50].strip() + '...' if len(line) > 50 else line.strip()
    return "Untitled Section"

In [22]:
def load_and_chunk_guidelines(file_path=None, guideline_type=None):
    """
    Load and chunk a single guideline file or all guidelines
    
    Args:
        file_path (str, optional): Path to a specific guideline file. If None, processes all guideline files.
        guideline_type (str, optional): Type of guideline. If None, tries to infer from filename.
        
    Returns:
        list: List of guideline chunks
    """
    # Default paths for all guidelines
    all_guideline_paths = {
        "general": "../Docs/Guidelines/3_Case_Generation_Guideline.md",
        "bishou": "../Docs/Guidelines/3_Case_Generation_Bishou.md", 
        "maritime": "../Docs/Guidelines/3_Case_Generation_Maritime_Logistics.md",
        "ocean": "../Docs/Guidelines/3_Case_Generation_Ocean_Container.md"
    }
    
    guideline_chunks = []
    
    # If a specific file is provided, process only that file
    if file_path:
        paths_to_process = {}
        
        # If guideline type is not provided, try to infer from filename
        if guideline_type is None:
            filename = os.path.basename(file_path)
            if "Guideline" in filename:
                guideline_type = "general"
            elif "Bishou" in filename:
                guideline_type = "bishou"
            elif "Maritime" in filename:
                guideline_type = "maritime"
            elif "Ocean" in filename or "Container" in filename:
                guideline_type = "ocean"
            else:
                guideline_type = "custom"
                
        paths_to_process[guideline_type] = file_path
    else:
        # Process all default guideline files
        paths_to_process = all_guideline_paths
    
    # Process each file
    for guideline_type, path in paths_to_process.items():
        try:
            with open(path, 'r', encoding='utf-8') as f:
                content = f.read()
                content_length = len(content)
                
                print(f"Processing {path}: {content_length} characters")
                
                # Try header-based splitting first
                chunks = re.split(r'(?=#+\s)', content)
                chunk_method = "header"
                print(f"  - Header-based chunks for {guideline_type}: {len(chunks)}")
                
                # If only one chunk, skip to size-based splitting with overlap
                if len(chunks) <= 1:
                    chunks = []
                    chunk_size = 1800
                    overlap = 100
                    
                    # Generate chunks with overlap
                    for i in range(0, len(content), chunk_size - overlap):
                        if i > 0:  # Not the first chunk
                            start_pos = i
                        else:
                            start_pos = 0
                            
                        chunk = content[start_pos:start_pos + chunk_size]
                        if len(chunk.strip()) > 50:  # Skip very small chunks
                            chunks.append(chunk)
                    
                    chunk_method = "size"
                    print(f"  - Size-based chunks with {overlap} char overlap: {len(chunks)}")
                
                file_chunks = 0
                for i, chunk in enumerate(chunks):
                    if len(chunk.strip()) > 50:  # Skip very small chunks
                        title = extract_title(chunk) or f"{guideline_type} section {i+1}"
                        guideline_chunks.append({
                            "content_type": "guideline",
                            "guideline_type": guideline_type,
                            "chunk_id": f"{guideline_type}_{i}",
                            "title": title,
                            "content": chunk,
                            "filename": path.split("/")[-1],
                            "chunk_method": chunk_method,
                            "chunk_index": i,
                            "total_chunks": len(chunks)
                        })
                        file_chunks += 1
                
                print(f"  - Final chunks for {guideline_type}: {file_chunks}")
        except Exception as e:
            print(f"Error loading guideline {path}: {e}")
    
    print(f"Created {len(guideline_chunks)} chunks from {len(paths_to_process)} guideline files")
    return guideline_chunks

# Example usage:
# Process a single file
# guideline_chunks = load_and_chunk_guidelines("../Docs/Guidelines/3_Case_Generation_Guideline.md")

# Process all default files
# guideline_chunks = load_and_chunk_guidelines()

In [23]:
# Load and chunk guidelines
guideline_chunks = load_and_chunk_guidelines()

Processing ../Docs/Guidelines/3_Case_Generation_Guideline.md: 16838 characters
  - Header-based chunks for general: 1
  - Size-based chunks with 100 char overlap: 10
  - Final chunks for general: 10
Processing ../Docs/Guidelines/3_Case_Generation_Bishou.md: 12466 characters
  - Header-based chunks for bishou: 1
  - Size-based chunks with 100 char overlap: 8
  - Final chunks for bishou: 8
Processing ../Docs/Guidelines/3_Case_Generation_Maritime_Logistics.md: 16838 characters
  - Header-based chunks for maritime: 1
  - Size-based chunks with 100 char overlap: 10
  - Final chunks for maritime: 10
Processing ../Docs/Guidelines/3_Case_Generation_Ocean_Container.md: 14223 characters
  - Header-based chunks for ocean: 1
  - Size-based chunks with 100 char overlap: 9
  - Final chunks for ocean: 9
Created 37 chunks from 4 guideline files


In [30]:
len(guideline_chunks)

37

## Process Example Cases

In [31]:
def load_and_summarize_examples():
    """Load example cases and generate summaries using the same approach as guidelines"""
    example_dir = "../Data/Cases/"
    example_items = []
    
    # Check if the directory exists
    if not os.path.exists(example_dir):
        print(f"Warning: Example directory {example_dir} not found")
        return []
    
    # Get list of markdown files first
    md_files = [f for f in os.listdir(example_dir) if f.endswith(".md")]
    print(f"Found {len(md_files)} example case files")
    
    # Process each file
    for i, file in enumerate(tqdm(md_files, desc="Processing example cases")):
        try:
            with open(os.path.join(example_dir, file), 'r', encoding='utf-8') as f:
                content = f.read()
                
                # Extract title from content or filename
                title = extract_title(content) or file.replace(".md", "").replace("_", " ")
                
                # Create a summary for embedding
                summary_prompt = f"""
                Read the following case example and create a concise summary that captures:
                - The main scenario
                - Key entities involved
                - Core regulatory issues
                - Problem to be solved
                
                EXAMPLE CASE:
                {content[:7000]}
                
                SUMMARY (focus only on the key aspects, be concise):
                """
                
                print(f"Generating summary for {file} ({i+1}/{len(md_files)})...")
                
                # Generate the summary
                try:
                    summary = generate_with_llm(summary_prompt)
                    print(f"  ✓ Summary generated: {len(summary)} characters")
                    time.sleep(6)  # Rate limit
                except Exception as e:
                    print(f"  ✗ Error generating summary: {e}")
                    summary = None
                
                # Add the example to our list (similar structure to guidelines)
                example_items.append({
                    "content_type": "example",
                    "example_type": "case_study",
                    "chunk_id": f"example_{file}",
                    "title": title,
                    "content": content,
                    "summary": summary,
                    "filename": file,
                    "word_count": len(content.split())
                })
                
        except Exception as e:
            print(f"Error processing file {file}: {e}")
    
    print(f"Successfully processed {len(example_items)} example cases")
    return example_items


In [32]:
# Process the examples
example_items = load_and_summarize_examples()

Found 9 example case files


Processing example cases:   0%|          | 0/9 [00:00<?, ?it/s]

Generating summary for Case_Global_Trust.md (1/9)...
  ✓ Summary generated: 573 characters


Processing example cases:  11%|█         | 1/9 [00:07<00:58,  7.29s/it]

Generating summary for GPT_Case_Jamestown_Engine.md (2/9)...
  ✓ Summary generated: 812 characters


Processing example cases:  22%|██▏       | 2/9 [00:14<00:50,  7.27s/it]

Generating summary for GPT_Case_Global_Semiconductor.md (3/9)...
  ✓ Summary generated: 735 characters


Processing example cases:  33%|███▎      | 3/9 [00:21<00:43,  7.22s/it]

Generating summary for Case_Famine_Relief.md (4/9)...
  ✓ Summary generated: 957 characters


Processing example cases:  44%|████▍     | 4/9 [00:29<00:36,  7.30s/it]

Generating summary for Case_Manwell_Toy.md (5/9)...
  ✓ Summary generated: 809 characters


Processing example cases:  56%|█████▌    | 5/9 [00:36<00:29,  7.38s/it]

Generating summary for GPT_Case_Nicholas_Vroom.md (6/9)...
  ✓ Summary generated: 827 characters


Processing example cases:  67%|██████▋   | 6/9 [00:43<00:22,  7.35s/it]

Generating summary for Case_Fedco_Fasteners.md (7/9)...
  ✓ Summary generated: 657 characters


Processing example cases:  78%|███████▊  | 7/9 [00:51<00:14,  7.33s/it]

Generating summary for Case_Great_Bite.md (8/9)...
  ✓ Summary generated: 839 characters


Processing example cases:  89%|████████▉ | 8/9 [00:58<00:07,  7.40s/it]

Generating summary for Case_Barbara_Blouse.md (9/9)...
  ✓ Summary generated: 578 characters


Processing example cases: 100%|██████████| 9/9 [01:06<00:00,  7.34s/it]

Successfully processed 9 example cases





In [33]:
# Display results
if example_items:
    print("\nExample summaries:")
    for i, item in enumerate(example_items[:2]):  # Show first 2
        print(f"\nExample {i+1}: {item['title']}")
        if item['summary']:
            print(f"Summary: {item['summary'][:150]}...")
        else:
            print("Summary: [Failed to generate]")
            
    # Save to JSON for backup
    import json
    with open("../Data/examples_with_summaries.json", "w", encoding="utf-8") as f:
        json.dump(example_items, f, ensure_ascii=False, indent=2)
    print("\nSaved examples to '../Data/examples_with_summaries.json'")


Example summaries:

Example 1: CASE: Global Trust Company
Summary: *   **Scenario:** A commercial loan officer at an international bank is tasked with assessing the bank's potential investments in the transportation a...

Example 2: CASE: Global Trust Company
Summary: *   **Scenario:** Global Trust Company, an international bank, is evaluating loan and investment opportunities in the transportation and logistics sec...

Saved examples to '../Data/examples_with_summaries.json'


## Generate Embeddings and Add to Qdrant

In [35]:
def add_embeddings_to_qdrant(items, collection_name=COLLECTION_NAME):
    """Generate embeddings and add to Qdrant collection"""
    client = get_qdrant_client()
    points = []
    
    print(f"Generating embeddings for {len(items)} items...")
    for item in tqdm(items):
        # For guidelines, embed the content
        # For examples, embed the summary
        text_to_embed = item['summary'] if 'summary' in item else item['content']
        
        # Generate embedding
        embedding = get_embedding(text_to_embed)
        if embedding is None:
            print(f"Warning: Failed to generate embedding for {item.get('chunk_id', item.get('title', 'unknown item'))}")
            continue
                
        # Generate a unique ID
        unique_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, item['chunk_id']))
                
        # Add point to batch
        points.append(
            models.PointStruct(
                id=unique_id,
                vector=embedding,
                payload=item
            )
        )
    
    # Upload points
    if points:
        client.upsert(
            collection_name=collection_name,
            points=points
        )
        print(f"✅ Added {len(points)} items to collection '{collection_name}'")
    else:
        print("No valid points to add")

# Combine all items and add to Qdrant
all_items = guideline_chunks + example_items
add_embeddings_to_qdrant(all_items)

Generating embeddings for 46 items...


100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


✅ Added 46 items to collection 'case_generation_references'


## Test Similarity Search

In [36]:
def find_similar_references(query, content_type=None, limit=3, collection_name=COLLECTION_NAME):
    """Find similar guidelines or examples based on the query"""
    client = get_qdrant_client()
    
    # Generate embedding for query
    query_embedding = get_embedding(query)
    if query_embedding is None:
        print("Failed to generate embedding for query")
        return []
    
    # Build search filter
    search_filter = None
    if content_type:
        search_filter = models.Filter(
            must=[
                models.FieldCondition(
                    key="content_type",
                    match=models.MatchValue(value=content_type)
                )
            ]
        )
    
    # Execute search
    results = client.search(
        collection_name=collection_name,
        query_vector=query_embedding,
        query_filter=search_filter,
        limit=limit
    )
    
    return results

def display_reference_results(results, query=None):
    """Display search results in a readable way"""
    if query:
        display(Markdown(f"## Search results for: '{query}'"))
    
    if not results:
        display(Markdown("*No results found*"))
        return
    
    for i, result in enumerate(results, 1):
        content_type = result.payload.get('content_type', 'unknown')
        guideline_type = result.payload.get('guideline_type', 'unknown')
        title = result.payload.get('title', 'Untitled')
        
        display(Markdown(f"### Result {i} - Score: {result.score:.4f}"))
        display(Markdown(f"**Type:** {content_type} ({guideline_type})  \n**Title:** {title}"))
        
        # Show content preview
        content = result.payload.get('content', '')
        if content:
            preview = content[:300] + '...' if len(content) > 300 else content
            display(Markdown(f"**Preview:**  \n{preview}"))
        
        # Show summary for examples
        if 'summary' in result.payload:
            display(Markdown(f"**Summary:**  \n{result.payload['summary']}"))
            
        display(Markdown("---"))

## Try Some Test Queries

In [37]:
# Test query against guidelines
guideline_query = "Creating realistic logistics scenarios for container shipping"
guideline_results = find_similar_references(guideline_query, content_type="guideline", limit=2)
display_reference_results(guideline_results, guideline_query)

  results = client.search(


## Search results for: 'Creating realistic logistics scenarios for container shipping'

### Result 1 - Score: 0.7450

**Type:** guideline (ocean)  
**Title:** nue using the Refined Port Arrival Document Checkl...

**Preview:**  
nue using the Refined Port Arrival Document Checklist Template (Version 3 - Security & Efficiency Focused) as the basis for creating "ideal" checklists for your scenarios.

This **Handbook of Ocean Container Transport Logistics Research Report** provides a focused and actionable guide for generating...

---

### Result 2 - Score: 0.7287

**Type:** guideline (bishou)  
**Title:** omplete or inaccurate data in the 24-hour manifest...

**Preview:**  
omplete or inaccurate data in the 24-hour manifest, leading to penalties and inspection delays.
        *   Scenario where a shipping line struggles to meet the 24-hour manifest cut-off time, requiring expedited documentation procedures and potentially incurring extra costs.
        *   Scenario hig...

---

In [38]:
# Test query against examples
example_query = "Customs documentation requirements for imports"
example_results = find_similar_references(example_query, content_type="example", limit=2)
display_reference_results(example_results, example_query)

  results = client.search(


## Search results for: 'Customs documentation requirements for imports'

### Result 1 - Score: 0.5946

**Type:** example (unknown)  
**Title:** CASE • Manwell Toy Importers

**Preview:**  
# CASE • Manwell Toy Importers

Headquartered in Winnipeg, Manitoba, Manwell Toy Company operated a large toy store on Portage Avenue, near the location where the company had been founded nearly seventy years ago. Above the toy store were offices from which the firm managed its chain of fifteen reta...

**Summary:**  
**Scenario:** Manwell Toy Importers, a Canadian toy retailer, is evaluating its Asian supply chain. Currently, toys are consolidated in Hong Kong and shipped to Vancouver.

**Entities:**

*   Manwell Toy Importers (Importer/Retailer)
*   Asian Toy Manufacturers
*   Consolidators (Hong Kong)
*   Port of Vancouver
*   Port of Halifax (potential)

**Regulatory Issues:**  The case does not mention any specific regulatory issues.

**Problem:** Manwell needs to optimize its inbound logistics to reduce costs and better serve different Canadian markets, specifically exploring the feasibility of shipping via Singapore to Halifax versus the current Hong Kong to Vancouver route. They also need to consider how to cater to the French-speaking Quebec market and the impact of e-commerce on warehousing locations.


---

### Result 2 - Score: 0.5937

**Type:** example (unknown)  
**Title:** CASE • Barbara’s Blouses

**Preview:**  
# CASE • Barbara’s Blouses

Barbara Linse buys blouses for a chain of ladies’ wear stores in major U.S. cities west of the Mississippi River. They sell clothing made in both the United States and Asia. Asia supplies an increasing amount and percentage of blouses that the chain sells. Twice yearly Ba...

**Summary:**  
*   **Scenario:** Barbara Linse, a blouse buyer for a US ladies' wear chain, primarily sources from Asia. She makes biannual trips to place orders and coordinate with other buyers.
*   **Key Entities:** Barbara Linse (buyer), Ladies' wear chain (employer), Asian blouse manufacturers.
*   **Core Regulatory Issues:** Ethical sourcing (child/prison labor), import regulations, international trade (tariffs).
*   **Problem to be solved:** How to effectively and ethically manage the overseas blouse sourcing process, considering costs, logistics, quality control, and compliance.


---

In [39]:
# Query across all content types
general_query = "Maritime logistics challenges in international shipping"
all_results = find_similar_references(general_query, limit=3)
display_reference_results(all_results, general_query)

  results = client.search(


## Search results for: 'Maritime logistics challenges in international shipping'

### Result 1 - Score: 0.7137

**Type:** example (unknown)  
**Title:** CASE: Global Trust Company

**Preview:**  
# CASE: Global Trust Company

Betsy Bertram had worked in the commercial loan office of the Farmers and Merchants’ Bank in Chicago, which recently had become part of an international banking conglomerate headquartered in Amsterdam. She supervised the section that handled loans to carriers, firms tha...

**Summary:**  
*   **Scenario:** Global Trust Company, an international bank, is evaluating loan and investment opportunities in the transportation and logistics sector.
*   **Key Entities:** Betsy Bertram (loan officer), new boss (from IT), potential borrowers/investees in shipping, supply chain, and transportation.
*   **Core Regulatory Issues:** Environmental regulations and compliance costs, particularly concerning "green" initiatives, and international maritime regulations.
*   **Problem to be solved:** How should the bank assess the risks and potential of different investment opportunities in the transportation and logistics industry, considering factors like vessel size optimization, market demand for new services, environmental impacts, infrastructure investments, internet growth and even space exploration.


---

### Result 2 - Score: 0.7062

**Type:** guideline (general)  
**Title:** ations.

**Preview:**  
ations.
    *   Example Scenario Ideas: (Build upon previous ideas, now emphasizing workflow and comprehensive document coverage)
        *   Typical container vessel arrival at a major European port (Rotterdam, Hamburg, Antwerp-Bruges, Riga) requiring a *full checklist* covering all essential comme...

---

### Result 3 - Score: 0.7062

**Type:** guideline (maritime)  
**Title:** ations.

**Preview:**  
ations.
    *   Example Scenario Ideas: (Build upon previous ideas, now emphasizing workflow and comprehensive document coverage)
        *   Typical container vessel arrival at a major European port (Rotterdam, Hamburg, Antwerp-Bruges, Riga) requiring a *full checklist* covering all essential comme...

---

## Summary

In [40]:
# Count items by type
client = get_qdrant_client()
total_count = client.count(collection_name=COLLECTION_NAME).count

# Get all points to analyze
points, _ = client.scroll(
    collection_name=COLLECTION_NAME,
    limit=total_count,
    with_payload=["content_type", "guideline_type"],
    with_vectors=False
)

# Count by type
content_types = {}
guideline_types = {}

for point in points:
    c_type = point.payload.get("content_type", "unknown")
    g_type = point.payload.get("guideline_type", "unknown")
    
    content_types[c_type] = content_types.get(c_type, 0) + 1
    guideline_types[g_type] = guideline_types.get(g_type, 0) + 1

# Display summary
display(Markdown(f"## Collection Summary"))
display(Markdown(f"Total items in collection: **{total_count}**"))

display(Markdown(f"### Content Types"))
for ctype, count in content_types.items():
    display(Markdown(f"- **{ctype}**: {count}"))

display(Markdown(f"### Guideline Types"))
for gtype, count in guideline_types.items():
    if gtype != "example":
        display(Markdown(f"- **{gtype}**: {count}"))

## Collection Summary

Total items in collection: **46**

### Content Types

- **guideline**: 37

- **example**: 9

### Guideline Types

- **general**: 10

- **maritime**: 10

- **unknown**: 9

- **ocean**: 9

- **bishou**: 8