In [1]:
pip install paragon-py

/home/watson/.bashrc: line 50: /usr/bin/em-sdk-env.sh: No such file or directory
Setting up EMSDK environment (suppress these messages with EMSDK_QUIET=1)
Adding directories to PATH:
PATH += /home/watson/emsdk
PATH += /home/watson/emsdk/upstream/emscripten

Setting environment variables:
PATH = /home/watson/emsdk:/home/watson/emsdk/upstream/emscripten:/home/watson/.bun/bin:/home/watson/.npm-global/bin:/home/watson/miniconda3/bin:/home/watson/miniconda3/condabin:/home/watson/.local/bin:/home/watson/bin:/usr/lib64/ccache:/usr/local/bin:/usr/bin:/home/watson/.dotnet/tools:/usr/local/go/bin
EMSDK = /home/watson/emsdk
EMSDK_NODE = /home/watson/emsdk/node/22.16.0_64bit/bin/node
Defaulting to user installation because normal site-packages is not writeable
Collecting paragon-py
  Downloading paragon_py-0.0.3-py3-none-any.whl.metadata (5.5 kB)
Downloading paragon_py-0.0.3-py3-none-any.whl (31.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.6/31.6 MB[0m [31m11.2 MB/s[0m


# Paragon-Py: Quickstart + Training Demo

This notebook shows how to:
- Create a 3-layer network with ReLU activations
- Train on a simple 2-class dataset (nonlinear XOR-ish pattern)
- Evaluate accuracy
- Compute **confidence buckets** from softmax probabilities

> Install once: `pip install paragon-py` (already on PyPI).



In [22]:

import os, math, random, statistics
from typing import List, Tuple
import paragon_py as p

# Try GPU; if it fails, fall back to CPU later
use_gpu = True
os.environ.setdefault("DISPLAY", ":0")
os.environ.setdefault("WGPU_BACKEND", "gl")  # change to 'vulkan'/'metal'/'dx12' per platform

# Model: 4→8→8→2, ReLU, ReLU, Linear head
h = p.new_network(
    shapes=[(4,8),(8,8),(8,2)],
    activations=["relu","relu","linear"],
    trainable=[True,True,True],
    use_gpu=use_gpu,
)

gpu_ok = p.initialize_gpu(h)
print("GPU initialized:", gpu_ok)


GPU initialized: True



## Build a exampe dataset

We'll learn a 2‑class decision boundary on 2D points and pad to 4 features.
Targets are one‑hot vectors `[1,0]` or `[0,1]`.


In [3]:

random.seed(7)

def make_dataset(n:int=512):
    X, Y = [], []
    for _ in range(n):
        x1 = random.uniform(-1.0, 1.0)
        x2 = random.uniform(-1.0, 1.0)
        # Nonlinear label: inside circle vs outside (xor-ish twist)
        r2 = x1*x1 + x2*x2
        y = 0 if r2 < 0.5 and x1*x2 < 0 else 1
        # pad to 4 features
        feat = [x1, x2, 0.0, 0.0]
        tgt = [1.0, 0.0] if y == 0 else [0.0, 1.0]
        X.append([feat])   # each sample is a 1x4 "row"
        Y.append([tgt])    # each target is a 1x2 "row"
    return X, Y

X_train, Y_train = make_dataset(512)
X_test,  Y_test  = make_dataset(256)

len(X_train), len(X_test)


(512, 256)


## Train

`paragon_py.train(handle, inputs, targets, epochs, lr, shuffle=False, ...)`

We'll run a few short epochs—this is just a smoke test, not a benchmark.


In [4]:

# A tiny training loop (Paragon trains the whole set internally)
p.train(h, X_train, Y_train, epochs=25, lr=0.05, shuffle=True)
p.train(h, X_train, Y_train, epochs=25, lr=0.02, shuffle=True)
print("Training done.")


Training done.



## Evaluate

We’ll forward each sample, read logits from `extract_output`, convert to softmax
probabilities, and compute accuracy + confidence buckets.


In [5]:

def softmax(logits):
    m = max(logits)
    ex = [math.exp(z - m) for z in logits]
    s = sum(ex)
    return [e/s for e in ex]

def predict_proba(batch_1x4):
    p.forward(h, batch_1x4)
    logits = p.extract_output(h)
    # ExtractOutput may flatten—ensure 2 elements for the head
    if len(logits) >= 2:
        logits = logits[:2]
    return softmax(logits)

def eval_dataset(X, Y):
    correct = 0
    probs = []
    for xi, yi in zip(X, Y):
        pr = predict_proba(xi)  # xi is [[f1,f2,f3,f4]]
        probs.append(pr[1])     # prob of class 1
        pred = 0 if pr[0] >= pr[1] else 1
        true = 0 if yi[0][0] > yi[0][1] else 1
        if pred == true:
            correct += 1
    acc = correct / len(X)
    return acc, probs

acc_train, train_probs = eval_dataset(X_train, Y_train)
acc_test,  test_probs  = eval_dataset(X_test,  Y_test)

print(f"Train accuracy: {acc_train:.3f}")
print(f"Test  accuracy: {acc_test:.3f}")


Train accuracy: 0.834
Test  accuracy: 0.762



## Confidence buckets

Bucket predictions by confidence (`max class probability`) into ranges to see how
calibrated the model is.


In [6]:

def bucket_counts(probs, bins=(0.5,0.7,0.9,0.97,0.99,1.01)):
    # bins define right edges. We'll count in: [0,0.5), [0.5,0.7), ..., [0.99,1.0]
    counts = [0]*(len(bins))
    for p1 in probs:
        m = max(p1, 1.0-p1)
        placed = False
        low = 0.0
        for i, hi in enumerate(bins):
            if m < hi:
                counts[i] += 1
                placed = True
                break
            low = hi
        if not placed:
            counts[-1] += 1
    labels = ["[0,0.5)","[0.5,0.7)","[0.7,0.9)","[0.9,0.97)","[0.97,0.99)","[0.99,1.0]"]
    return list(zip(labels, counts))

print("Train confidence buckets:")
for lab,c in bucket_counts(train_probs):
    print(f"{lab:>10}: {c}")
print("\nTest confidence buckets:")
for lab,c in bucket_counts(test_probs):
    print(f"{lab:>10}: {c}")


Train confidence buckets:
   [0,0.5): 0
 [0.5,0.7): 164
 [0.7,0.9): 106
[0.9,0.97): 146
[0.97,0.99): 73
[0.99,1.0]: 23

Test confidence buckets:
   [0,0.5): 0
 [0.5,0.7): 73
 [0.7,0.9): 53
[0.9,0.97): 84
[0.97,0.99): 33
[0.99,1.0]: 13



## Inference on a few samples


In [7]:

samples = [
    [[-0.9, -0.9, 0.0, 0.0]],  # likely class 1 or 0 depending on learned boundary
    [[ 0.1,  0.1, 0.0, 0.0]],
    [[ 0.8, -0.2, 0.0, 0.0]],
]

for i, s in enumerate(samples):
    pr = predict_proba(s)
    print(f"sample {i}: proba(class0)={pr[0]:.3f}  proba(class1)={pr[1]:.3f}")


sample 0: proba(class0)=0.018  proba(class1)=0.982
sample 1: proba(class0)=0.373  proba(class1)=0.627
sample 2: proba(class0)=0.463  proba(class1)=0.537


In [8]:

# Cleanup GPU resources (safe if CPU too)
p.cleanup_gpu(h)


In [None]:
# Without gpu

In [28]:
import random, paragon_py as p

# 3-layer dense net, CPU only
h = p.new_network(
    shapes=[(4,1),(8,1),(2,1)],             # [4] -> [8] -> [2]
    activations=["linear","relu","linear"], # allow gradients at layer 1
    trainable=[True, True, True],
    use_gpu=False
)

# Tiny separable toy dataset: class 1 if a+b > c+d else class 0
def make_data(n=256):
    X, Y = [], []
    for _ in range(n):
        a,b,c,d = [random.random() for _ in range(4)]
        cls = 1 if a+b > c+d else 0
        X.append([[a,b,c,d]])                 # shape [1][4]
        one = [0.0,0.0]; one[cls] = 1.0
        Y.append([one])                       # shape [1][2]
    return X, Y

X, Y = make_data(512)

# Give the network a small "kick" so weights aren't all zeros
p.train(h, X, Y, epochs=10, lr=0.05, shuffle=True)

# Inference
p.forward(h, [[0.2,0.4,0.6,0.8]])
print("CPU output:", p.extract_output(h)) 

CPU output: [1.3603724241256714, 0.847637951374054]
⚠️ Negative loss (-0.1205) detected at sample 23, epoch 0. Stopping training early.


In [None]:
# With gpu

In [27]:
import paragon_py as paragon

# Create a small 3-layer network: input → hidden → output
# Each layer uses ReLU activation and is trainable.
h = paragon.new_network(
    shapes=[(4, 8), (8, 8), (8, 2)],     # width x height per layer
    activations=["relu", "relu", "relu"],
    trainable=[True, True, True],
    use_gpu=True
)

# Initialize GPU backend (optional but faster)
paragon.initialize_gpu(h)

# Dummy forward pass
sample_input = [[0.1, 0.5, 0.3, 0.7]]
paragon.forward(h, sample_input)

# Extract and print the output
out = paragon.extract_output(h)
print("Network output:", out)

# Cleanup GPU resources
paragon.cleanup_gpu(h)

Network output: [0.0, 0.8963102698326111, 0.0, 2.7732486724853516, 0.0, 0.0, 0.0, 0.0]
