In [None]:
import ollama
import time
import psutil
import subprocess
from datetime import datetime

TESTER_NAME = "Rami"
DEVICE = "Windows PC (RTX 5070 Ti, Ryzen 9 9950X, 32GB RAM)"

tests = [
    {
        "model": "gemma3:12b",
        "prompt": "Write a Python function to solve a quadratic equation."
    },
    {
        "model": "llama3.1:8b",
        "prompt": "Explain the difference between AI and Machine Learning in simple terms."
    },
    {
        "model": "nomic-embed-text",
        "prompt": "Hello world"
    }
]

def get_vram_usage():
    """Get current GPU memory usage via nvidia-smi (not peak)."""
    try:
        result = subprocess.check_output(
            ["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits"],
            shell=False
        )
        return result.decode().strip() + " MB"
    except Exception:
        return "N/A"

def test_model(test):
    model = test["model"]
    prompt = test["prompt"]

    start_time = time.time()
    ram_before = psutil.virtual_memory().used / (1024 ** 3)

    errors = ""
    output_sample = ""

    try:
        if "embed" in model:
            response = ollama.embeddings(model=model, prompt=prompt)
            output_sample = f"Embedding size: {len(response['embedding'])}"
        else:
            response = ollama.chat(
                model=model,
                messages=[{"role": "user", "content": prompt}]
            )
            output_sample = response["message"]["content"][:150]

    except Exception as e:
        errors = str(e)

    response_time = round(time.time() - start_time, 2)
    ram_after = psutil.virtual_memory().used / (1024 ** 3)

    # Better CPU measurement: sample over a short interval
    cpu_usage = psutil.cpu_percent(interval=0.2)

    return {
        "Date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "Tester": TESTER_NAME,
        "Errors/Warnings": errors or "None",
        "Output Sample": output_sample,
        "Response Time": f"{response_time} sec",
        "VRAM Usage": get_vram_usage(),
        "RAM Usage": f"{round(ram_after - ram_before, 2)} GB",
        "CPU Usage": f"{cpu_usage} %",
        "Device Used": DEVICE,
        "Prompt Used": prompt,
        "Model Name": model,
    }

results = [test_model(t) for t in tests]

print("\n=== COPY TO EXCEL ===\n")
for r in results:
    print("\t".join(str(v) for v in r.values()))



=== COPY TO EXCEL ===

2026-02-10 20:54:40	Rami	None	```python
import cmath

def solve_quadratic_equation(a, b, c):
  """
  Solves a quadratic equation of the form ax^2 + bx + c = 0.

  Args:
    a: The 	19.06 sec	14029 MB	-0.61 GB	2.9 %	Windows PC (RTX 5070 Ti, Ryzen 9 9950X, 16GB RAM)	Write a Python function to solve a quadratic equation.	gemma3:12b
2026-02-10 20:54:48	Rami	None	**Artificial Intelligence (AI)**: Think of AI as a super smart robot that can think, learn, and act like a human being. It's designed to perform tasks	7.67 sec	9778 MB	-1.08 GB	6.1 %	Windows PC (RTX 5070 Ti, Ryzen 9 9950X, 16GB RAM)	Explain the difference between AI and Machine Learning in simple terms.	llama3.1:8b
2026-02-10 20:54:49	Rami	None	Embedding size: 768	0.83 sec	10531 MB	0.5 GB	4.1 %	Windows PC (RTX 5070 Ti, Ryzen 9 9950X, 16GB RAM)	Hello world	nomic-embed-text


In [None]:
import ollama
import time
import psutil
import subprocess
import threading
from datetime import datetime
from pathlib import Path
import openpyxl

# =========================
# CONFIG
# =========================
REPORT_PATH = r"Models Report.xlsx"   # <-- put your Excel file in the same folder OR set full path here
TESTER_NAME = "Rami"
DEVICE = "Windows PC (RTX 5070 Ti, Ryzen 9 9950X, 32GB RAM)"

# Models to test
CHAT_MODEL = "gemma3:12b"
EMBED_MODEL = "nomic-embed-text"  # change to "bge-m3" if you use it
EMBED_MODEL = "bge-m3"  # change to "bge-m3" if you use it

# Strong prompts to stress a chat model (reasoning + structure + instruction-following)
CHAT_TESTS = [
    {
        "name": "Policy Reasoning + SOP",
        "prompt": (
            "You are an enterprise AI assistant inside an air-gapped company.\n"
            "Task: Create an internal SOP for handling a suspected data leak incident.\n"
            "Requirements:\n"
            "1) Provide exactly 7 numbered steps.\n"
            "2) Add a short 'Decision Table' with 3 rows: Severity (Low/Med/High) -> Action.\n"
            "3) Keep tone professional, no dramatic language.\n"
        )
    },
    {
        "name": "Deep reasoning (trade-offs)",
        "prompt": (
            "We must choose between two options for internal knowledge:\n"
            "A) RAG with vector database (ChromaDB) and embeddings\n"
            "B) Fine-tuning using LoRA on internal chat logs\n\n"
            "Compare A vs B across: Security, Maintenance, Accuracy, Cost, Upgrade flexibility.\n"
            "Then recommend one for the first 3 months, and justify in 5 bullets.\n"
        )
    },
    {
        "name": "Format obedience (JSON only)",
        "prompt": (
            "Return ONLY valid JSON (no markdown, no extra text).\n"
            "Create a JSON object with keys:\n"
            "summary (string), risks (array of 4 strings), mitigations (array of 4 strings).\n"
            "Topic: Deploying local LLMs on an air-gapped network for 10+ users.\n"
        )
    }
]

EMBED_TEST = {
    "name": "Embedding sanity",
    "prompt": "Company annual leave policy and approval workflow."
}

# =========================
# METRICS HELPERS
# =========================
def _query_vram_mb():
    """Return current GPU VRAM used in MB (string->int). Works if nvidia-smi is available."""
    try:
        out = subprocess.check_output(
            ["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits"],
            stderr=subprocess.DEVNULL
        )
        # If multiple GPUs, this returns multiple lines; take the max.
        vals = [int(x.strip()) for x in out.decode().strip().splitlines() if x.strip().isdigit()]
        return max(vals) if vals else None
    except Exception:
        return None

class PeakSampler:
    """Samples VRAM and CPU/RAM while a model call is running."""
    def __init__(self, interval=0.2):
        self.interval = interval
        self._stop = threading.Event()
        self.peak_vram = None
        self.peak_ram_gb = None
        self.peak_cpu = None

    def start(self):
        self._stop.clear()
        t = threading.Thread(target=self._run, daemon=True)
        t.start()
        self._thread = t

    def stop(self):
        self._stop.set()
        self._thread.join(timeout=2)

    def _run(self):
        p = psutil.Process()
        while not self._stop.is_set():
            # VRAM
            v = _query_vram_mb()
            if v is not None:
                self.peak_vram = v if self.peak_vram is None else max(self.peak_vram, v)

            # RAM (system used)
            ram_gb = psutil.virtual_memory().used / (1024 ** 3)
            self.peak_ram_gb = ram_gb if self.peak_ram_gb is None else max(self.peak_ram_gb, ram_gb)

            # CPU (process %)
            cpu = p.cpu_percent(interval=None)
            self.peak_cpu = cpu if self.peak_cpu is None else max(self.peak_cpu, cpu)

            time.sleep(self.interval)

def test_chat_model(model: str, prompt: str):
    sampler = PeakSampler(interval=0.2)
    errors = ""
    output_sample = ""

    start = time.time()
    sampler.start()
    try:
        resp = ollama.chat(model=model, messages=[{"role": "user", "content": prompt}])
        output_sample = resp["message"]["content"][:250]
    except Exception as e:
        errors = str(e)
    finally:
        sampler.stop()

    elapsed = round(time.time() - start, 2)

    return {
        "Errors/Warnings": errors or "None",
        "Output Sample": output_sample,
        "Response Time": f"{elapsed} sec",
        "VRAM Usage": f"{sampler.peak_vram} MB" if sampler.peak_vram is not None else "N/A",
        "RAM Usage": f"{round(sampler.peak_ram_gb, 2)} GB" if sampler.peak_ram_gb is not None else "N/A",
        "CPU Usage": f"{round(sampler.peak_cpu, 1)} %" if sampler.peak_cpu is not None else "N/A",
    }

def test_embed_model(model: str, prompt: str):
    sampler = PeakSampler(interval=0.2)
    errors = ""
    output_sample = ""

    start = time.time()
    sampler.start()
    try:
        resp = ollama.embeddings(model=model, prompt=prompt)
        output_sample = f"Embedding size: {len(resp['embedding'])}"
    except Exception as e:
        errors = str(e)
    finally:
        sampler.stop()

    elapsed = round(time.time() - start, 2)

    return {
        "Errors/Warnings": errors or "None",
        "Output Sample": output_sample,
        "Response Time": f"{elapsed} sec",
        "VRAM Usage": f"{sampler.peak_vram} MB" if sampler.peak_vram is not None else "N/A",
        "RAM Usage": f"{round(sampler.peak_ram_gb, 2)} GB" if sampler.peak_ram_gb is not None else "N/A",
        "CPU Usage": f"{round(sampler.peak_cpu, 1)} %" if sampler.peak_cpu is not None else "N/A",
    }

# =========================
# EXCEL WRITER
# =========================
HEADERS = [
    "Date","Tester","Errors/Warnings","Output Sample","Response Time",
    "VRAM Usage","RAM Usage","CPU Usage","Device Used","Prompt Used","Model Name"
]

def append_to_excel(report_path: str, rows: list[dict]):
    report_file = Path(report_path)
    if not report_file.exists():
        raise FileNotFoundError(f"Excel file not found: {report_file.resolve()}")

    wb = openpyxl.load_workbook(report_file)
    ws = wb.active

    # Validate header row
    existing_headers = [ws.cell(1, c).value for c in range(1, ws.max_column + 1)]
    if existing_headers[:len(HEADERS)] != HEADERS:
        # If sheet is empty or headers mismatch, write headers
        for i, h in enumerate(HEADERS, start=1):
            ws.cell(1, i).value = h

    start_row = ws.max_row + 1
    for i, r in enumerate(rows):
        row_idx = start_row + i
        for col_idx, h in enumerate(HEADERS, start=1):
            ws.cell(row_idx, col_idx).value = r.get(h, "")

    wb.save(report_file)

# =========================
# RUN TESTS
# =========================
def main():
    rows = []

    # Chat tests (gemma2:12b)
    for t in CHAT_TESTS:
        metrics = test_chat_model(CHAT_MODEL, t["prompt"])
        rows.append({
            "Date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "Tester": TESTER_NAME,
            "Errors/Warnings": metrics["Errors/Warnings"],
            "Output Sample": f"[{t['name']}] {metrics['Output Sample']}",
            "Response Time": metrics["Response Time"],
            "VRAM Usage": metrics["VRAM Usage"],
            "RAM Usage": metrics["RAM Usage"],
            "CPU Usage": metrics["CPU Usage"],
            "Device Used": DEVICE,
            "Prompt Used": t["prompt"],
            "Model Name": CHAT_MODEL,
        })

    # Embedding test
    metrics = test_embed_model(EMBED_MODEL, EMBED_TEST["prompt"])
    rows.append({
        "Date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "Tester": TESTER_NAME,
        "Errors/Warnings": metrics["Errors/Warnings"],
        "Output Sample": f"[{EMBED_TEST['name']}] {metrics['Output Sample']}",
        "Response Time": metrics["Response Time"],
        "VRAM Usage": metrics["VRAM Usage"],
        "RAM Usage": metrics["RAM Usage"],
        "CPU Usage": metrics["CPU Usage"],
        "Device Used": DEVICE,
        "Prompt Used": EMBED_TEST["prompt"],
        "Model Name": EMBED_MODEL,
    })

    append_to_excel(REPORT_PATH, rows)
    print(f"Done. Appended {len(rows)} rows to: {Path(REPORT_PATH).resolve()}")

if __name__ == "__main__":
    main()


Done. Appended 4 rows to: C:\Users\omarch\Desktop\Rami\projects\offline_Ai\test_models\Models Report.xlsx


In [None]:
import ollama
import time
import psutil
import subprocess
import threading
from datetime import datetime
from pathlib import Path
import openpyxl

# =========================
# CONFIG
# =========================
REPORT_PATH = r"Models Report.xlsx"
TESTER_NAME = "Rami"
DEVICE = "Windows PC (RTX 5070 Ti, Ryzen 9 9950X, 32GB RAM)"

CHAT_MODEL = "gemma3:12b"
#! EMBED_MODEL = "nomic-embed-text"
EMBED_MODEL = "bge-m3"


# =========================
# TEST PROMPTS (English + Arabic)
# =========================

CHAT_TESTS = [

    {
        "name": "Arabic SOP",
        "prompt": (
            "أنت مساعد ذكاء اصطناعي يعمل داخل شركة تعتمد نظام ذكاء اصطناعي محلي.\n"
            "اكتب إجراء تشغيلي قياسي (SOP) للتعامل مع حادث تسريب بيانات محتمل.\n"
            "المتطلبات:\n"
            "1) سبع خطوات مرقمة فقط.\n"
            "2) جدول قرار مبسط يحتوي على (منخفض، متوسط، عالي).\n"
            "3) لغة رسمية واحترافية بدون مبالغة.\n"
        )
    },

    {
        "name": "Arabic Reasoning Deep",
        "prompt": (
            "لدينا خياران لإدارة المعرفة الداخلية:\n"
            "أ) استخدام RAG مع قاعدة بيانات متجهات.\n"
            "ب) استخدام Fine-Tuning عبر LoRA.\n\n"
            "قارن بين الخيارين من حيث:\n"
            "الأمان، الصيانة، الدقة، التكلفة، وسهولة الترقية.\n"
            "ثم قدم توصية واضحة مع تبرير منطقي من 5 نقاط."
        )
    },

    {
        "name": "Arabic JSON Strict",
        "prompt": (
            "أعد النتيجة بصيغة JSON فقط بدون أي نص إضافي.\n"
            "المفاتيح المطلوبة:\n"
            "summary (string), risks (array of 4 strings), mitigations (array of 4 strings).\n"
            "الموضوع: نشر نموذج ذكاء اصطناعي محلي داخل بيئة Air-Gapped."
        )
    },

    {
        "name": "Mixed Language Stress",
        "prompt": (
            "Explain in English the risks of deploying local LLMs in air-gapped networks, "
            "then summarize the explanation in Arabic in 5 bullet points."
        )
    },

    {
        "name": "Logical Structure Test",
        "prompt": (
            "Create a decision tree in text format for choosing between:\n"
            "1) Small model (under 5GB)\n"
            "2) Medium model (8-12B)\n"
            "3) Large model (27B+)\n\n"
            "Consider latency, GPU memory, number of users, and reasoning quality."
        )
    }
]

EMBED_TEST = {
    "name": "Arabic Embedding",
    "prompt": "سياسة الإجازات السنوية وآلية الموافقة عليها داخل الشركة."
}

# =========================
# METRIC SAMPLER
# =========================

def query_vram():
    try:
        out = subprocess.check_output(
            ["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits"]
        )
        vals = [int(x.strip()) for x in out.decode().splitlines()]
        return max(vals)
    except:
        return None

class PeakSampler:
    def __init__(self):
        self.stop_flag = False
        self.peak_vram = 0
        self.peak_ram = 0
        self.peak_cpu = 0

    def start(self):
        threading.Thread(target=self.sample, daemon=True).start()

    def sample(self):
        process = psutil.Process()
        while not self.stop_flag:
            vram = query_vram()
            if vram:
                self.peak_vram = max(self.peak_vram, vram)
            ram = psutil.virtual_memory().used / (1024**3)
            self.peak_ram = max(self.peak_ram, ram)
            cpu = process.cpu_percent(interval=None)
            self.peak_cpu = max(self.peak_cpu, cpu)
            time.sleep(0.2)

    def stop(self):
        self.stop_flag = True


# =========================
# TEST FUNCTIONS
# =========================

def test_chat(model, prompt):
    sampler = PeakSampler()
    sampler.start()

    start = time.time()
    error = ""
    full_output = ""

    try:
        response = ollama.chat(
            model=model,
            messages=[{"role": "user", "content": prompt}]
        )
        full_output = response["message"]["content"]

        # Print full output to console
        print("\n" + "="*60)
        print(f"MODEL: {model}")
        print("PROMPT:\n", prompt)
        print("\nRESPONSE:\n", full_output)
        print("="*60 + "\n")

    except Exception as e:
        error = str(e)

    sampler.stop()
    elapsed = round(time.time() - start, 2)

    return {
        "Errors/Warnings": error or "None",
        "Output Sample": full_output[:300],
        "Response Time": f"{elapsed} sec",
        "VRAM Usage": f"{sampler.peak_vram} MB",
        "RAM Usage": f"{round(sampler.peak_ram,2)} GB",
        "CPU Usage": f"{round(sampler.peak_cpu,1)} %",
    }

def test_embed(model, prompt):
    start = time.time()
    error = ""
    try:
        response = ollama.embeddings(model=model, prompt=prompt)
        size = len(response["embedding"])
        print(f"\nEmbedding vector size: {size}\n")
        output = f"Embedding size: {size}"
    except Exception as e:
        error = str(e)
        output = ""
    elapsed = round(time.time() - start, 2)
    return {
        "Errors/Warnings": error or "None",
        "Output Sample": output,
        "Response Time": f"{elapsed} sec",
        "VRAM Usage": "N/A",
        "RAM Usage": "N/A",
        "CPU Usage": "N/A",
    }

# =========================
# SAVE TO EXCEL
# =========================

def append_to_excel(rows):
    wb = openpyxl.load_workbook(REPORT_PATH)
    ws = wb.active

    for row in rows:
        ws.append([
            datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            TESTER_NAME,
            row["Errors/Warnings"],
            row["Output Sample"],
            row["Response Time"],
            row["VRAM Usage"],
            row["RAM Usage"],
            row["CPU Usage"],
            DEVICE,
            row.get("Prompt Used",""),
            row.get("Model Name","")
        ])

    wb.save(REPORT_PATH)

# =========================
# RUN
# =========================

def main():
    rows = []

    for t in CHAT_TESTS:
        result = test_chat(CHAT_MODEL, t["prompt"])
        result["Prompt Used"] = t["prompt"]
        result["Model Name"] = CHAT_MODEL
        rows.append(result)

    embed_result = test_embed(EMBED_MODEL, EMBED_TEST["prompt"])
    embed_result["Prompt Used"] = EMBED_TEST["prompt"]
    embed_result["Model Name"] = EMBED_MODEL
    rows.append(embed_result)

    append_to_excel(rows)
    print("Results saved to Excel.")

if __name__ == "__main__":
    main()



MODEL: gemma3:12b
PROMPT:
 أنت مساعد ذكاء اصطناعي يعمل داخل شركة تعتمد نظام ذكاء اصطناعي محلي.
اكتب إجراء تشغيلي قياسي (SOP) للتعامل مع حادث تسريب بيانات محتمل.
المتطلبات:
1) سبع خطوات مرقمة فقط.
2) جدول قرار مبسط يحتوي على (منخفض، متوسط، عالي).
3) لغة رسمية واحترافية بدون مبالغة.


RESPONSE:
 ## إجراء تشغيلي قياسي (SOP) للتعامل مع حادث تسريب بيانات محتمل

**الهدف:** توثيق الإجراءات اللازمة لتحديد وتقييم والاستجابة لحوادث تسريب البيانات المحتملة داخل الشركة، مع ضمان تقليل الضرر المحتمل والالتزام بالمتطلبات القانونية والتنظيمية.

**النطاق:** يغطي هذا الإجراء جميع موظفي الشركة وأي طرف ثالث له حق الوصول إلى بيانات الشركة.

**1. اكتشاف وإبلاغ:** عند الاشتباه في حدوث تسريب بيانات، يجب على الموظف المسؤول الإبلاغ الفوري إلى قسم تكنولوجيا المعلومات والأمن السيبراني عبر القناة المحددة (مثل البريد الإلكتروني المخصص أو رقم الهاتف). يجب تضمين أكبر قدر ممكن من التفاصيل حول الحادث المشتبه به (البيانات المتأثرة المحتملة، كيفية الاكتشاف، الخ).

**2. احتواء الحادث:** يقوم فريق تكنولوجيا المعلومات والأ