# Welcome to the ScaleDown Compression Workshop!

In this workshop, you'll compete to compress prompts to different target percentages,
then use ScaleDown API to achieve even better compression ratios.

## Workshop Structure:
1. Setup and installation
2. Manual compression challenges (3 rounds)
3. ScaleDown API compression comparison
4. Final leaderboard and analysis

## Competition Rules:
- Try to compress prompts to target percentages manually first
- Then use ScaleDown to see how it compares
- Token counting with tiktoken for precise measurement

In [None]:
!pip install tiktoken -q

In [None]:
import tiktoken
import requests
import json
import os
from typing import Dict, List, Tuple
import pandas as pd
from datetime import datetime

In [None]:
# Initialize the tokenizer (using GPT-4 encoding as standard)
encoding = tiktoken.encoding_for_model("gpt-4")

In [None]:
def count_tokens(text: str) -> int:
    """Count tokens in a text string using tiktoken."""
    return len(encoding.encode(text))

def calculate_compression_ratio(original: str, compressed: str) -> float:
    """Calculate compression ratio as percentage."""
    original_tokens = count_tokens(original)
    compressed_tokens = count_tokens(compressed)
    if original_tokens == 0:
        return 0.0
    return (compressed_tokens / original_tokens) * 100

def display_comparison(original: str, manual: str, scaledown_compressed: str, target: float):
    """Display side-by-side comparison of manual vs ScaleDown compression."""

    orig_tokens = count_tokens(original)
    manual_tokens = count_tokens(manual)
    scaledown_tokens = count_tokens(scaledown_compressed)

    manual_ratio = calculate_compression_ratio(original, manual)
    scaledown_ratio = calculate_compression_ratio(original, scaledown_compressed)

    print(f"🎯 TARGET: {target}% compression")
    print(f"{'='*80}")
    print(f"📊 RESULTS COMPARISON:")
    print(f"{'Method':<15}{'Tokens':<10}{'Ratio':<12}{'Accuracy':<12}{'Token Savings'}")
    print(f"{'-'*65}")
    print(f"{'Original':<15}{orig_tokens:<10}{100.0:<12.1f}{'-':<12}{'-'}")
    print(f"{'Your Manual':<15}{manual_tokens:<10}{manual_ratio:<12.1f}{100-abs(manual_ratio-target):<12.1f}{orig_tokens-manual_tokens}")
    print(f"{'ScaleDown':<15}{scaledown_tokens:<10}{scaledown_ratio:<12.1f}{100-abs(scaledown_ratio-target):<12.1f}{orig_tokens-scaledown_tokens}")
    print(f"{'='*80}")

    # Show the compressed texts
    print(f"\n📝 YOUR COMPRESSION:")
    print(f"{manual}\n")
    print(f"🤖 SCALEDOWN COMPRESSION:")
    print(f"{scaledown_compressed}\n")

## SCALEDOWN API SETUP

In [None]:
SCALEDOWN_API_KEY = "your key"  # 🔑 Add your API key here!

# ScaleDown API configuration
SCALEDOWN_URL = "https://api.scaledown.xyz/compress/raw"

In [None]:
def compress_with_scaledown(prompt: str, compression_rate: float, model: str = "gpt-4-turbo") -> Dict:
    """
    Compress a prompt using ScaleDown API.

    Args:
        prompt: The text to compress
        compression_rate: Compression rate (0.1 = 10%, 0.9 = 90%)
        model: AI model to use

    Returns:
        Dictionary with compression results
    """
    headers = {
        'x-api-key': SCALEDOWN_API_KEY,
        'Content-Type': 'application/json'
    }

    payload = {
        "prompt": prompt,
        "model": model,
        "scaledown": {
            "rate": compression_rate
        }
    }

    try:
        response = requests.post(SCALEDOWN_URL, headers=headers, data=json.dumps(payload))
        response.raise_for_status()
        result = response.json()
        return {
            "success": True,
            "compressed_prompt": result.get("compressed_prompt", ""),
            "original_tokens": count_tokens(prompt),
            "compressed_tokens": count_tokens(result.get("compressed_prompt", "")),
            "compression_ratio": calculate_compression_ratio(prompt, result.get("compressed_prompt", "")),
            "api_response": result
        }
    except requests.exceptions.RequestException as e:
        return {
            "success": False,
            "error": str(e),
            "original_tokens": count_tokens(prompt),
            "compressed_tokens": 0,
            "compression_ratio": 0
        }

## Level 1: 30% Compression

In [None]:
PROMPT_1 = """You are a helpful AI assistant designed to provide comprehensive and detailed responses to user queries. Your primary objective is to understand the user's intent, analyze their request thoroughly, and deliver accurate, well-structured, and informative answers. You should always strive to be clear, concise, and helpful while maintaining a professional yet friendly tone. When responding to questions, please consider multiple perspectives, provide relevant examples when appropriate, and ensure that your explanations are accessible to users with varying levels of expertise on the topic. If you encounter ambiguous requests, feel free to ask clarifying questions to better understand what the user is looking for. Additionally, always fact-check your responses and acknowledge when you're uncertain about specific details or when information might be subject to change."""

In [None]:
print("🥉 CHALLENGE 1: BRONZE LEVEL - 30% COMPRESSION")
print("="*60)
print(f"Original prompt ({count_tokens(PROMPT_1)} tokens):")
print(f'"{PROMPT_1}"')
print(f"\n🎯 YOUR TASK: Compress this prompt to approximately 70% of its original length")
print(f"Target tokens: ~{int(count_tokens(PROMPT_1) * 0.7)} tokens")

In [None]:
# 👇 EDIT THIS: Replace with your compressed version
YOUR_COMPRESSION_1 = """You are a helpful AI assistant that provides comprehensive, detailed responses to user queries. Your objective is to understand user intent, analyze requests thoroughly, and deliver accurate, well-structured answers. Be clear, concise, and helpful while maintaining a professional yet friendly tone. Consider multiple perspectives, provide relevant examples, and ensure explanations are accessible to users with varying expertise levels. Ask clarifying questions for ambiguous requests, fact-check responses, and acknowledge uncertainty when appropriate."""


In [None]:
# Compare with ScaleDown
scaledown_result_1 = compress_with_scaledown(PROMPT_1, 0.3)  # 0.3 rate = ~30% compression
display_comparison(PROMPT_1, YOUR_COMPRESSION_1, scaledown_result_1["compressed_prompt"], 30.0)

## Level 2: 50% Compression

In [None]:
# Original prompt for Challenge 2
PROMPT_2 = """As an expert data analyst and business intelligence specialist, your role involves examining complex datasets, identifying meaningful patterns and trends, generating actionable insights, and presenting findings in a clear and compelling manner to stakeholders across different organizational levels. You should be proficient in statistical analysis, data visualization techniques, and various analytical methodologies including regression analysis, clustering, time series analysis, and predictive modeling. When working with data, always ensure you validate your assumptions, check for data quality issues such as missing values or outliers, and consider the broader business context when interpreting results. Your analysis should be thorough yet accessible, providing both high-level summaries for executives and detailed technical explanations for analysts and data scientists. Additionally, you should be prepared to recommend specific actions based on your findings and help organizations make data-driven decisions that align with their strategic objectives and operational requirements."""

In [None]:
print("\n" + "="*80)
print("🥈 CHALLENGE 2: SILVER LEVEL - 50% COMPRESSION")
print("="*60)
print(f"Original prompt ({count_tokens(PROMPT_2)} tokens):")
print(f'"{PROMPT_2}"')
print(f"\n🎯 YOUR TASK: Compress this prompt to approximately 50% of its original length")
print(f"Target tokens: ~{int(count_tokens(PROMPT_2) * 0.5)} tokens")


In [None]:
# 👇 EDIT THIS: Replace with your compressed version
YOUR_COMPRESSION_2 = """As an expert data analyst, examine datasets, identify patterns, generate insights, and present findings clearly to stakeholders. Be proficient in statistical analysis, visualization, regression, clustering, time series, and predictive modeling. Validate assumptions, check data quality, consider business context. Provide high-level summaries for executives and technical details for analysts. Recommend actions and help organizations make data-driven decisions."""


In [None]:
# Compare with ScaleDown
scaledown_result_2 = compress_with_scaledown(PROMPT_2, 0.5)  # 0.5 rate = ~50% compression
display_comparison(PROMPT_2, YOUR_COMPRESSION_2, scaledown_result_2["compressed_prompt"], 50.0)

## Level 3: 70% Compression

In [None]:
# Original prompt for Challenge 3
PROMPT_3 = """You are an advanced artificial intelligence system specifically designed to function as a comprehensive research assistant and knowledge synthesis expert. Your capabilities encompass a wide range of academic and professional domains, including but not limited to scientific research, literary analysis, historical investigation, technological innovation, business strategy development, and creative problem-solving methodologies. When approached with complex research queries or analytical tasks, you should demonstrate the ability to break down multifaceted problems into manageable components, conduct thorough examination of available information from multiple authoritative sources, synthesize findings into coherent and well-structured presentations, and provide nuanced insights that account for various perspectives and potential limitations in the available data. Your responses should reflect not only factual accuracy but also critical thinking skills, methodological rigor, and an understanding of the broader implications of the research findings. Furthermore, you should be capable of adapting your communication style to match the needs and expertise level of your audience, whether they are undergraduate students seeking foundational understanding, graduate researchers requiring detailed technical analysis, or industry professionals looking for practical applications of theoretical concepts. In all interactions, maintain the highest standards of intellectual integrity, acknowledge uncertainties and limitations in available knowledge, and provide clear citations or references when drawing upon specific sources of information."""


In [None]:
print("\n" + "="*80)
print("🥇 CHALLENGE 3: GOLD LEVEL - 70% COMPRESSION")
print("="*60)
print(f"Original prompt ({count_tokens(PROMPT_3)} tokens):")
print(f'"{PROMPT_3}"')
print(f"\n🎯 YOUR TASK: Compress this prompt to approximately 30% of its original length")
print(f"Target tokens: ~{int(count_tokens(PROMPT_3) * 0.3)} tokens")

In [None]:
# 👇 EDIT THIS: Replace with your compressed version
YOUR_COMPRESSION_3 = """Advanced AI research assistant and knowledge synthesis expert across academic and professional domains. Break down complex problems, examine authoritative sources, synthesize findings, provide nuanced insights. Demonstrate critical thinking, methodological rigor, adapt communication to audience expertise level. Maintain intellectual integrity, acknowledge limitations, provide citations."""


In [None]:
# Compare with ScaleDown
scaledown_result_3 = compress_with_scaledown(PROMPT_3, 0.7)  # 0.7 rate = ~70% compression
display_comparison(PROMPT_3, YOUR_COMPRESSION_3, scaledown_result_3["compressed_prompt"], 70.0)

## Results!

In [None]:
print("\n" + "🎉 WORKSHOP COMPLETE! 🎉")
print("="*50)
print("📊 YOUR PERFORMANCE SUMMARY:")

challenges = [
    ("Challenge 1 (30%)", PROMPT_1, YOUR_COMPRESSION_1, 70.0),
    ("Challenge 2 (50%)", PROMPT_2, YOUR_COMPRESSION_2, 50.0),
    ("Challenge 3 (70%)", PROMPT_3, YOUR_COMPRESSION_3, 30.0)
]

total_accuracy = 0
for name, original, compressed, target in challenges:
    actual = calculate_compression_ratio(original, compressed)
    accuracy = 100 - abs(actual - target)
    total_accuracy += accuracy
    print(f"{name}: {actual:.1f}% compression)")