In [1]:
import json
import pandas as pd
import re
import os
import urllib



## LLM service

In [2]:
# Ollama default: local service
OLLAMA_BASE_URL = "http://localhost:11434"

# OpenAI-compatible defaults:
OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")

In [3]:
def set_openai_credentials(base_url=None, api_key=None):
    """
    Optionally update the OpenAI connection info at runtime.
    """
    global OPENAI_BASE_URL, OPENAI_API_KEY
    if base_url:
        OPENAI_BASE_URL = base_url.rstrip("/")
    if api_key is not None:
        OPENAI_API_KEY = api_key

def verify_ollama_connection(base_url=None, timeout=5):
    """
    Verify Ollama by calling /api/tags.
    """
    base = (base_url or OLLAMA_BASE_URL).rstrip("/")
    url = f"{base}/api/tags"
    try:
        req = urllib.request.Request(url, method="GET")
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            return {"ok": True, "status": resp.status, "provider": "ollama"}
    except urllib.error.URLError as e:
        return {"ok": False, "error": str(e), "provider": "ollama"}

def verify_openai_connection(base_url=None, api_key=None, timeout=5):
    """
    Verify OpenAI-compatible endpoint by calling /v1/models.
    Requires a valid API key on endpoints that enforce auth.
    """
    base = (base_url or OPENAI_BASE_URL).rstrip("/")
    key = OPENAI_API_KEY if api_key is None else api_key
    url = f"{base}/v1/models"
    try:
        req = urllib.request.Request(url, method="GET", headers={
            "Authorization": f"Bearer {key}" if key else "",
            "Content-Type": "application/json",
        })
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            return {"ok": True, "status": resp.status, "provider": "openai"}
    except urllib.error.URLError as e:
        return {"ok": False, "error": str(e), "provider": "openai"}

# simple connect check
def verify_ollama_connection(base_url="http://localhost:11434", timeout=5):
    url = base_url.rstrip("/") + "/api/tags"
    try:
        req = urllib.request.Request(url, method="GET")
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            return {"ok": True, "status": resp.status}
    except urllib.error.URLError as e:
        return {"ok": False, "error": str(e)}

def set_openai_credentials(base_url=None, api_key=None):
    """
    Optionally update the OpenAI connection info at runtime.
    """
    global OPENAI_BASE_URL, OPENAI_API_KEY
    if base_url:
        OPENAI_BASE_URL = base_url.rstrip("/")
    if api_key is not None:
        OPENAI_API_KEY = api_key
        

# Start a structure to hold model metadata
def initialize_model_registry(model_names):
    """
    Given a list of model names, return a dict where each
    model is a key and its value is an empty dict (to be filled later).
    """
    return {name: {} for name in model_names}


def build_model_registry(base_url="http://localhost:11434", timeout=5):
    """
    Query Ollama for local models and return a dict with useful metadata.
    Keys are model names; values are plain JSON-serializable dicts.
    """
    base = base_url.rstrip("/")
    # 1) Get local models
    with urllib.request.urlopen(f"{base}/api/tags", timeout=timeout) as resp:
        tags = json.loads(resp.read().decode("utf-8"))
    models = tags.get("models", [])

    registry = {}

    def _post_json(url, payload):
        data = json.dumps(payload).encode("utf-8")
        req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            return json.loads(resp.read().decode("utf-8"))

    for m in models:
        name = (m or {}).get("name")
        if not name:
            continue

        # Base fields from /api/tags
        details = (m or {}).get("details") or {}
        entry = {
            "name": name,
            "modified_at": m.get("modified_at"),
            "size_bytes": m.get("size"),
            "digest": m.get("digest"),
            "format": details.get("format"),
            "family": details.get("family"),
            "families": details.get("families"),
            "parameter_size": details.get("parameter_size"),       # e.g., "7B", "13B"
            "quantization": details.get("quantization_level"),     # e.g., "Q4_0"
        }

        # Enrich via /api/show
        try:
            shown = _post_json(f"{base}/api/show", {"model": name})
        except urllib.error.URLError:
            registry[name] = entry
            continue

        model_info = shown.get("model_info") or {}
        parameters_blob = shown.get("parameters") or ""

        # Parameter count
        param_count = model_info.get("general.parameter_count")
        if isinstance(param_count, int):
            entry["parameter_count"] = param_count

        # Context length
        ctx = None
        for k, v in model_info.items():
            if isinstance(k, str) and k.endswith(".context_length") and isinstance(v, int):
                ctx = v
                break
        if ctx is None and isinstance(parameters_blob, str):
            m_ctx = re.search(r"\bnum_ctx\s+(\d+)", parameters_blob)
            if m_ctx:
                ctx = int(m_ctx.group(1))
        entry["context_length"] = ctx

        registry[name] = entry

    return registry

def enrich_model_registry(registry):
    """
    Add extra fields useful for recommendations.
    Defaults are provided where info is missing so you can easily update later.
    """
    for name, entry in registry.items():
        # --- Defaults section ---
        # If the API does not return these, we set safe defaults.
        entry.setdefault("instruction_tuned", True)   # assume instruction tuned unless you know it's a base model
        entry.setdefault("task_specialty", ["general"])  # can update per model (e.g., ["code"], ["math"])
        entry.setdefault("release_date", "unknown")   # placeholder, fill with year/month if known
        entry.setdefault("publisher", "unknown")      # e.g., Meta, Mistral AI, etc.
        entry.setdefault("license", "unknown")        # e.g., Apache 2.0, commercial-restricted
        # --- End defaults section ---

        # Normalize parameter_count if it's missing but parameter_size is present
        if "parameter_count" not in entry and "parameter_size" in entry:
            size_str = entry["parameter_size"].lower()
            if size_str.endswith("b"):  # handle like "7b", "13b"
                try:
                    entry["parameter_count"] = int(float(size_str[:-1]) * 1e9)
                except ValueError:
                    pass

    return registry

def registry_to_dataframe(registry):
    """
    Convert the model registry dict into a Pandas DataFrame.
    Each key in the registry becomes a row.
    """
    df = pd.DataFrame.from_dict(registry, orient="index")
    df.index.name = "model"
    return df.reset_index()

In [4]:
# Run the check
verify_ollama_connection()
# verify_openai_connection()

{'ok': True, 'status': 200}

In [5]:
model_registry = build_model_registry()
model_registry

{'llama3.1:8b': {'name': 'llama3.1:8b',
  'modified_at': '2025-09-03T15:23:37.281393299-04:00',
  'size_bytes': 4920753328,
  'digest': '46e0c10c039e019119339687c3c1757cc81b9da49709a3b3924863ba87ca666e',
  'format': 'gguf',
  'family': 'llama',
  'families': ['llama'],
  'parameter_size': '8.0B',
  'quantization': 'Q4_K_M',
  'parameter_count': 8030261312,
  'context_length': 131072},
 'llama2-uncensored:latest': {'name': 'llama2-uncensored:latest',
  'modified_at': '2025-08-12T09:12:54.355064054-04:00',
  'size_bytes': 3825819449,
  'digest': '44040b9222331f7eacd27ec9254e42de585af28d2c5d1211cdaeb3ffa361fe3f',
  'format': 'gguf',
  'family': 'llama',
  'families': None,
  'parameter_size': '7B',
  'quantization': 'Q4_0',
  'parameter_count': 6738415616,
  'context_length': 2048},
 'whisper:latest': {'name': 'whisper:latest',
  'modified_at': '2025-07-22T19:00:46.141884565-04:00',
  'size_bytes': 44039248,
  'digest': '9aafc61ff108ca6baffe7131bbd34ad157dfaf30799f81b6dcc455efeea6b8b0',
 

In [6]:
# Convert and preview
df_models = registry_to_dataframe(model_registry)
df_models.head()

Unnamed: 0,model,name,modified_at,size_bytes,digest,format,family,families,parameter_size,quantization,parameter_count,context_length
0,llama3.1:8b,llama3.1:8b,2025-09-03T15:23:37.281393299-04:00,4920753328,46e0c10c039e019119339687c3c1757cc81b9da49709a3...,gguf,llama,[llama],8.0B,Q4_K_M,8030261312,131072.0
1,llama2-uncensored:latest,llama2-uncensored:latest,2025-08-12T09:12:54.355064054-04:00,3825819449,44040b9222331f7eacd27ec9254e42de585af28d2c5d12...,gguf,llama,,7B,Q4_0,6738415616,2048.0
2,whisper:latest,whisper:latest,2025-07-22T19:00:46.141884565-04:00,44039248,9aafc61ff108ca6baffe7131bbd34ad157dfaf30799f81...,gguf,unknown,[unknown],37.76M,unknown,37760640,
3,dimavz/whisper-tiny:latest,dimavz/whisper-tiny:latest,2025-07-22T18:50:09.444127962-04:00,44039248,9aafc61ff108ca6baffe7131bbd34ad157dfaf30799f81...,gguf,unknown,[unknown],37.76M,unknown,37760640,
4,openchat:latest,openchat:latest,2025-07-14T18:40:40.971119981-04:00,4109876386,537a4e03b649d93bf57381199a85f412bfc35912e46db1...,gguf,llama,[llama],7B,Q4_0,7241748480,8192.0


# Expanded model info

In [7]:
# Enrich the registry
model_registry = enrich_model_registry(model_registry)
model_registry


{'llama3.1:8b': {'name': 'llama3.1:8b',
  'modified_at': '2025-09-03T15:23:37.281393299-04:00',
  'size_bytes': 4920753328,
  'digest': '46e0c10c039e019119339687c3c1757cc81b9da49709a3b3924863ba87ca666e',
  'format': 'gguf',
  'family': 'llama',
  'families': ['llama'],
  'parameter_size': '8.0B',
  'quantization': 'Q4_K_M',
  'parameter_count': 8030261312,
  'context_length': 131072,
  'instruction_tuned': True,
  'task_specialty': ['general'],
  'release_date': 'unknown',
  'publisher': 'unknown',
  'license': 'unknown'},
 'llama2-uncensored:latest': {'name': 'llama2-uncensored:latest',
  'modified_at': '2025-08-12T09:12:54.355064054-04:00',
  'size_bytes': 3825819449,
  'digest': '44040b9222331f7eacd27ec9254e42de585af28d2c5d1211cdaeb3ffa361fe3f',
  'format': 'gguf',
  'family': 'llama',
  'families': None,
  'parameter_size': '7B',
  'quantization': 'Q4_0',
  'parameter_count': 6738415616,
  'context_length': 2048,
  'instruction_tuned': True,
  'task_specialty': ['general'],
  'rele

## Summaries

In [8]:
# Build DataFrame
df_models = registry_to_dataframe(model_registry)

# --- Useful summaries ---
print("Models per family:")
print(df_models["family"].value_counts(), "\n")

print("Models per task_specialty:")
# task_specialty is a list, so flatten it for counts
all_tasks = pd.Series([task for tasks in df_models["task_specialty"] for task in tasks])
print(all_tasks.value_counts(), "\n")

print("Average context length by family:")
print(df_models.groupby("family")["context_length"].mean(), "\n")

print("Largest models by parameter_count:")
print(df_models.sort_values("parameter_count", ascending=False).head())


Models per family:
family
llama        10
gemma3        3
unknown       2
phi3          1
deepseek2     1
Name: count, dtype: int64 

Models per task_specialty:
general    17
Name: count, dtype: int64 

Average context length by family:
family
deepseek2    163840.0
gemma3        98304.0
llama         48537.6
phi3         131072.0
unknown           NaN
Name: context_length, dtype: float64 

Largest models by parameter_count:
                       model                      name  \
6   deepseek-coder-v2:latest  deepseek-coder-v2:latest   
7                 llama2:13b                llama2:13b   
14                gemma3:12b                gemma3:12b   
0                llama3.1:8b               llama3.1:8b   
11           llama3.1:latest           llama3.1:latest   

                            modified_at  size_bytes  \
6   2025-07-09T18:49:47.931980663-04:00  8905126121   
7   2025-05-05T18:57:06.208582485-04:00  7366821294   
14  2025-04-20T15:24:14.131208347-04:00  8149190253   
0  

## recommender

In [9]:
# Minimal rule-based recommender (resource-agnostic)

# 1) Heuristic tagger: infer specialties/flags from model name
def _auto_tag_model(entry):
    """
    Adds/adjusts tags based on common naming patterns.
    Safe defaults stay in place if no matches are found.
    """
    name = entry.get("name", "").lower()

    # --- Defaults (explicit for visibility) ---
    # keep any existing values from your earlier enrichment
    entry.setdefault("instruction_tuned", True)
    entry.setdefault("task_specialty", ["general"])
    entry.setdefault("multilingual", True)  # adjust if you prefer conservative False
    # -----------------------------------------

    tags = set(entry.get("task_specialty", []))

    # Code-focused models
    if any(t in name for t in ["codellama", "code", "qwen2.5-coder", "deepseek-coder", "starcoder", "phind"]):
        tags.add("code")

    # Math / reasoning hints
    if any(t in name for t in ["math", "reason", "deepseek-r1", "r1:"]):
        tags.add("reasoning")

    # Instruct/chat indicators
    if any(t in name for t in ["instruct", "chat", ":it", "-it", "qwen2.5-instruct", "gemma-it"]):
        entry["instruction_tuned"] = True

    # Long-context hint (numeric check beats name)
    if isinstance(entry.get("context_length"), int) and entry["context_length"] >= 64_000:
        tags.add("long_context")

    # Multilingual hints (very rough)
    if any(t in name for t in ["qwen", "gemma", "mistral", "llama", "phi"]):
        entry["multilingual"] = True

    entry["task_specialty"] = sorted(tags) if tags else ["general"]
    return entry


# Apply auto-tagging to your in-memory registry
for _n, _e in model_registry.items():
    model_registry[_n] = _auto_tag_model(_e)

# Rebuild the DataFrame to reflect updates
df_models = pd.DataFrame.from_dict(model_registry, orient="index").reset_index(drop=True)


# 2) Recommendation function
def recommend_model(
    use_case: str,
    min_context: int | None = None,
    require_instruction_tuned: bool | None = None,
    prefer_newer: bool = True,
    top_k: int = 5
) -> pd.DataFrame:
    """
    Score and rank models for a given use_case.
    Resource constraints are intentionally ignored.

    Parameters
    ----------
    use_case : str
        e.g., "general_chat", "code_generation", "long_doc_summary", "reasoning_qa"
    min_context : int | None
        Minimum context length requirement (tokens). If None, no hard floor.
    require_instruction_tuned : bool | None
        If True, filter to instruction-tuned; if False, prefer non-instruction; if None, no hard requirement.
    prefer_newer : bool
        Use 'release_date' as a tie-break preference when available.
    top_k : int
        Number of top candidates to return.

    Returns
    -------
    pd.DataFrame with columns: model, family, parameter_count, context_length, instruction_tuned,
    task_specialty, score, reasons
    """
    if df_models.empty:
        return pd.DataFrame()

    # Map use_case to soft preferences
    uc = use_case.lower()
    wants = {
        "code": uc in ("code", "code_generation", "code_assist", "coding"),
        "long": uc in ("long_context", "long_doc_summary", "summarization_long"),
        "reason": uc in ("reasoning", "reasoning_qa", "math", "complex_qa"),
        "chat": uc in ("chat", "assistant", "general_chat", "qa", "customer_support"),
    }

    rows = []
    for _, row in df_models.iterrows():
        score = 0
        reasons = []

        # Basic fields
        model = row.get("name") or row.get("model") or "unknown"
        family = row.get("family")
        ctx = row.get("context_length")
        pc = row.get("parameter_count")
        instruct = bool(row.get("instruction_tuned"))
        specialty = set(row.get("task_specialty", []) or [])

        # Hard filters
        if min_context is not None and isinstance(ctx, int) and ctx < min_context:
            continue
        if require_instruction_tuned is True and not instruct:
            continue

        # Soft preferences
        # Instruction tuning for chat/assistant flows
        if wants["chat"]:
            if instruct:
                score += 3; reasons.append("instruction-tuned for chat")
            else:
                score -= 1; reasons.append("not instruction-tuned")

        # Code specialty
        if wants["code"]:
            if "code" in specialty:
                score += 3; reasons.append("code-specialized")
            else:
                score += 1; reasons.append("general model for code")

        # Reasoning/math specialty
        if wants["reason"]:
            if "reasoning" in specialty:
                score += 3; reasons.append("reasoning-specialized")
            else:
                score += 1; reasons.append("general reasoning")

        # Long context preference
        if wants["long"]:
            if isinstance(ctx, int):
                # reward proportionally up to 128k
                score += min(ctx, 128_000) / 32_000  # up to +4
                reasons.append(f"context_length={ctx}")
            else:
                score -= 1; reasons.append("unknown context_length")

        # General capability proxy via parameter count (diminishing returns)
        if isinstance(pc, (int, float)):
            # scale: 1e9 -> +1, 7e9 -> +2.5, 13e9 -> +3, 70e9 -> +4 (capped)
            cap_bonus = min(4.0, 1.0 + (pc / 7e9))  # simple, monotonic
            score += cap_bonus
            reasons.append(f"parameter_count≈{int(pc):,}")
        else:
            reasons.append("unknown parameter_count")

        # Quantization nudge (lighter quantization may reduce quality slightly)
        q = (row.get("quantization") or "").upper()
        if q.startswith("Q2"):
            score -= 1; reasons.append("very aggressive quantization")
        elif q.startswith("Q3"):
            score -= 0.5; reasons.append("aggressive quantization")
        elif q.startswith("Q4"):
            score -= 0.2; reasons.append("moderate quantization")
        elif q.startswith("Q5") or q.startswith("Q6") or q.startswith("F16") or q.startswith("BF16"):
            score += 0.2; reasons.append("higher quality quantization")

        # Prefer newer releases if available
        if prefer_newer:
            rd = row.get("release_date")
            if isinstance(rd, str) and rd not in ("", "unknown"):
                score += 0.2; reasons.append("newer release")

        rows.append({
            "model": model,
            "family": family,
            "parameter_count": pc,
            "context_length": ctx,
            "instruction_tuned": instruct,
            "task_specialty": sorted(list(specialty)) if specialty else ["general"],
            "quantization": row.get("quantization"),
            "score": round(float(score), 3),
            "reasons": "; ".join(reasons)
        })

    if not rows:
        return pd.DataFrame()

    recs = pd.DataFrame(rows)
    recs = recs.sort_values(["score", "parameter_count", "context_length"], ascending=[False, False, False])
    return recs.head(top_k).reset_index(drop=True)


# 3) Example calls (uncomment to try):
# recommend_model("general_chat", min_context=4096, require_instruction_tuned=True, top_k=5)
recommend_model("code_generation", min_context=8192, top_k=5)
# recommend_model("long_doc_summary", min_context=65536, top_k=5)
# recommend_model("reasoning_qa", top_k=5)


Unnamed: 0,model,family,parameter_count,context_length,instruction_tuned,task_specialty,quantization,score,reasons
0,deepseek-coder-v2:latest,deepseek2,15706484224,163840.0,True,"[code, general, long_context]",Q4_0,6.044,"code-specialized; parameter_count≈15,706,484,2..."
1,llama2:13b,llama,13015864320,4096.0,True,[general],Q4_0,3.659,"general model for code; parameter_count≈13,015..."
2,gemma3:12b,gemma3,12187079280,131072.0,True,"[general, long_context]",Q4_K_M,3.541,"general model for code; parameter_count≈12,187..."
3,llama3.1:8b,llama,8030261312,131072.0,True,"[general, long_context]",Q4_K_M,2.947,"general model for code; parameter_count≈8,030,..."
4,llama3.1:latest,llama,8030261312,131072.0,True,"[general, long_context]",Q4_K_M,2.947,"general model for code; parameter_count≈8,030,..."
