In [2]:
!pip install transformers sentencepiece langchain_community

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip[0m


In [12]:
#!/usr/bin/env python3
import os
import sys

# For DeepSeek, we use Hugging Face's AutoTokenizer.
from transformers import AutoTokenizer

def count_deepseek_tokens(text):
    # Initialize tokenizer for DeepSeek-R1 (adjust the model name if needed)
    tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Llama-70B")
    tokens = tokenizer.encode(text)
    return len(tokens)

# For Ollama, we use the LangChain community wrapper.
try:
    from langchain_community.llms.ollama import Ollama
except ImportError:
    print("Please install langchain_community (e.g. pip install langchain_community) to use Ollama functions.")
    sys.exit(1)

def count_ollama_tokens(text):
    # Instantiate the Ollama LLM with your preferred model and base_url.
    # Make sure Ollama is running locally (default port: 11434).
    ollama_llm = Ollama(model="llama3.2", base_url="http://localhost:11434")
    return ollama_llm.get_num_tokens(text)

def process_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()
    except Exception as e:
        print(f"Skipping {file_path}: {e}")
        return None

    text_length = len(text)
    deepseek_count = count_deepseek_tokens(text)
    ollama_count = count_ollama_tokens(text)
    return file_path, text_length, deepseek_count, ollama_count

def main():

    root_dir = "/Users/reva/Documents/geek_projects/dungbeetle"

    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            file_path = os.path.join(dirpath, filename)
            result = process_file(file_path)
            if result:
                file_path, text_length, deepseek_count, ollama_count = result
                print(f"File: {file_path}")
                print(f"Text length: {text_length} characters")
                print(f"DeepSeek token count: {deepseek_count}")
                print(f"Ollama token count: {ollama_count}")
                print("-" * 40)

if __name__ == "__main__":
    main()

File: /Users/reva/Documents/geek_projects/dungbeetle/go.mod
Text length: 2176 characters
DeepSeek token count: 868
Ollama token count: 1046
----------------------------------------
File: /Users/reva/Documents/geek_projects/dungbeetle/LICENSE
Text length: 1110 characters
DeepSeek token count: 234
Ollama token count: 286
----------------------------------------
File: /Users/reva/Documents/geek_projects/dungbeetle/Dockerfile
Text length: 494 characters
DeepSeek token count: 165
Ollama token count: 212
----------------------------------------
File: /Users/reva/Documents/geek_projects/dungbeetle/Makefile
Text length: 968 characters
DeepSeek token count: 306
Ollama token count: 388
----------------------------------------
File: /Users/reva/Documents/geek_projects/dungbeetle/config.toml.sample
Text length: 1126 characters
DeepSeek token count: 388
Ollama token count: 475
----------------------------------------
File: /Users/reva/Documents/geek_projects/dungbeetle/go.sum
Text length: 12062 cha

In [7]:
import os
import ollama
from langchain_community.chat_models import ChatOllama
from langchain.text_splitter import RecursiveCharacterTextSplitter
import time
import sys
from prompts import (
    GENERAL_SUMMARY_PROMPT_TEMPLATE,
    FILE_SUMMARY_PROMPT_TEMPLATE,
    FOLDER_SUMMARY_PROMPT_TEMPLATE,
    ROOT_PROJECT_SUMMARY_PROMPT_TEMPLATE
)

# Initialize LangChain's Ollama wrapper for LLaMA 3.2
llm = ChatOllama(model="llama3.2")

def generate_summary(content, context="code"):
    """
    Uses LangChain with Ollama to summarize the given content.
    """
    prompt = GENERAL_SUMMARY_PROMPT_TEMPLATE.format(context=context, content=content)
    return llm.predict(prompt).strip()

def recursive_summarize_text(text, context="code", max_tokens=131072):
    """
    Recursively summarizes the text using LangChain's RecursiveCharacterTextSplitter.
    """
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=max_tokens, 
        chunk_overlap=200  # Ensures smooth transitions between chunks
    )

    chunks = splitter.split_text(text)
    
    if len(chunks) == 1:
        return generate_summary(text, context)
    
    # Summarize each chunk and combine results
    partial_summaries = [generate_summary(chunk, context) for chunk in chunks]
    combined_summary = "\n".join(partial_summaries)

    # Recurse until the combined summary fits within the token limit
    return recursive_summarize_text(combined_summary, context, max_tokens)


def main():
    file_path = "/Users/reva/Documents/geek_projects/dungbeetle/client/client.go"
    with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()
    
    text_length = len(text)
    foo = recursive_summarize_text(text)
    print(foo)

if __name__ == "__main__":
    main()

  return llm.predict(prompt).strip()


This is a Go package that provides an API client for interacting with the DungBeetle SQL Jobber service. The client allows users to:

1. Post new jobs to the job queue.
2. Retrieve the status of existing jobs.
3. Delete jobs and groups.
4. Fetch pending jobs from a specific queue.
5. Create a group and post multiple jobs under it.

The client uses HTTP requests to interact with the DungBeetle service, which is likely running on a remote server. The `doHTTPReq` function is a reusable method that makes an HTTP request with the given parameters and unmarshals the JSON response into a Go struct.

Here's a high-level overview of how the client works:

1. The user creates an instance of the `Client` struct, passing in an `Opt` object that contains configuration settings such as the root URL and HTTP client.
2. The client provides various methods for performing different operations, such as `PostJob`, `GetJobStatus`, and `DeleteJob`.
3. Each method calls the `doHTTPReq` function to make an HT