In [1]:
!python3 -m pip install torch torchvision uuid pinecone dotenv

Collecting torch
  Downloading torch-2.6.0-cp312-cp312-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.21.0-cp312-cp312-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting uuid
  Downloading uuid-1.30.tar.gz (5.8 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting pinecone
  Downloading pinecone-6.0.2-py3-none-any.whl.metadata (9.0 kB)
Collecting dotenv
  Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Collecting filelock (from torch)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting networkx (from torch)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-non

In [3]:
import os
import uuid
from dotenv import load_dotenv
import torch
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn.functional as F
import pinecone
from pinecone import Pinecone, ServerlessSpec
from PIL import Image

# Vectorization function import
from img2vec import img2vec

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [6]:
img_shape = 1024

In [7]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.225, 0.225, 0.225])
])

In [8]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

100.0%


In [9]:
vec_dimensions = img2vec(train_dataset[0][0]).shape[0]

In [10]:
vec_dimensions

2048

In [17]:
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = f"cifar10-index-{vec_dimensions}"

# Index has already been created!
# pc.create_index(
#   name=index_name,
#   dimension=vec_dimensions,
#   metric="cosine",
#   spec=ServerlessSpec(
#     cloud="aws",
#     region="us-east-1"
#   )
# )

cifar_index = pc.Index(index_name)

In [18]:
cifar_index.describe_index_stats()

{'dimension': 2048,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}

In [20]:
#cifar_index.delete(delete_all=True)

In [19]:
# Training process: Query train dataset vectors on vector database and if
# response is empty or the ANNs are less than 80%, upsert query vector to
# vector database.

In [21]:
classes = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]

vectors_by_classes = {}

for i in range(len(classes)):
  num_vecs = 0
  vectors_by_classes[i] = []

  for j in range(len(train_dataset)):
    if num_vecs == 450:
      break

    if train_dataset[j][1] == i:
      num_vecs += 1
      vectors_by_classes[i].append(img2vec(train_dataset[j][0]))
      print(f"Added vector {len(vectors_by_classes[i])} in class {i}")

Added vector 1 in class 0
Added vector 2 in class 0
Added vector 3 in class 0
Added vector 4 in class 0
Added vector 5 in class 0
Added vector 6 in class 0
Added vector 7 in class 0
Added vector 8 in class 0
Added vector 9 in class 0
Added vector 10 in class 0
Added vector 11 in class 0
Added vector 12 in class 0
Added vector 13 in class 0
Added vector 14 in class 0
Added vector 15 in class 0
Added vector 16 in class 0
Added vector 17 in class 0
Added vector 18 in class 0
Added vector 19 in class 0
Added vector 20 in class 0
Added vector 21 in class 0
Added vector 22 in class 0
Added vector 23 in class 0
Added vector 24 in class 0
Added vector 25 in class 0
Added vector 26 in class 0
Added vector 27 in class 0
Added vector 28 in class 0
Added vector 29 in class 0
Added vector 30 in class 0
Added vector 31 in class 0
Added vector 32 in class 0
Added vector 33 in class 0
Added vector 34 in class 0
Added vector 35 in class 0
Added vector 36 in class 0
Added vector 37 in class 0
Added vect

In [22]:
# Upserting hypervectors

for c in vectors_by_classes:
    for i in range(len(vectors_by_classes[c])):
        cifar_index.upsert(vectors=[(str(uuid.uuid4()), vectors_by_classes[c][i], {"class": c})])
        print(f"Upserted vector {i+1}/450 in class {c}")

Upserted vector 1/450 in class 0
Upserted vector 2/450 in class 0
Upserted vector 3/450 in class 0
Upserted vector 4/450 in class 0
Upserted vector 5/450 in class 0
Upserted vector 6/450 in class 0
Upserted vector 7/450 in class 0
Upserted vector 8/450 in class 0
Upserted vector 9/450 in class 0
Upserted vector 10/450 in class 0
Upserted vector 11/450 in class 0
Upserted vector 12/450 in class 0
Upserted vector 13/450 in class 0
Upserted vector 14/450 in class 0
Upserted vector 15/450 in class 0
Upserted vector 16/450 in class 0
Upserted vector 17/450 in class 0
Upserted vector 18/450 in class 0
Upserted vector 19/450 in class 0
Upserted vector 20/450 in class 0
Upserted vector 21/450 in class 0
Upserted vector 22/450 in class 0
Upserted vector 23/450 in class 0
Upserted vector 24/450 in class 0
Upserted vector 25/450 in class 0
Upserted vector 26/450 in class 0
Upserted vector 27/450 in class 0
Upserted vector 28/450 in class 0
Upserted vector 29/450 in class 0
Upserted vector 30/450 

In [42]:
idx = 500

test_query_vector = img2vec(test_dataset[idx][0])
test_label = test_dataset[idx][1]

response = cifar_index.query(
    namespace="",
    vector=test_query_vector.tolist(),
    top_k=5,
    include_values=False,
    include_metadata=True,
)

counts = {}

for vec in response["matches"]:
    counts[int(vec["metadata"]["class"])] = counts.get(vec["metadata"]["class"], 0) + 1 

max(counts, key=counts.get)

4

In [33]:
test_label

4

In [44]:
accuracy = 0
test_dataset_size = len(test_dataset)

for i in range(len(test_dataset)):
    test_query_vector = img2vec(test_dataset[i][0])
    label = test_dataset[i][1]

    response = cifar_index.query(
        namespace="",
        vector=test_query_vector.tolist(),
        top_k=5,
        include_values=False,
        include_metadata=True,
    )

    counts = {}
    
    for vec in response["matches"]:
        counts[int(vec["metadata"]["class"])] = counts.get(vec["metadata"]["class"], 0) + 1 
    
    pred = max(counts, key=counts.get)

    accuracy += 1 if (pred == label) else 0

    if i % 10 == 0:
        print(f"Iteration {i} accuracy: {accuracy / (i+1)}")

print()
print()
print()
print()
print(f"Accuracy: {accuracy / test_dataset_size:.4f}")

Iteration 0 accuracy: 1.0
Iteration 10 accuracy: 0.9090909090909091
Iteration 20 accuracy: 0.9523809523809523
Iteration 30 accuracy: 0.8709677419354839
Iteration 40 accuracy: 0.8292682926829268
Iteration 50 accuracy: 0.8431372549019608
Iteration 60 accuracy: 0.819672131147541
Iteration 70 accuracy: 0.8309859154929577
Iteration 80 accuracy: 0.8395061728395061
Iteration 90 accuracy: 0.8461538461538461
Iteration 100 accuracy: 0.8514851485148515
Iteration 110 accuracy: 0.8468468468468469
Iteration 120 accuracy: 0.859504132231405
Iteration 130 accuracy: 0.8549618320610687
Iteration 140 accuracy: 0.851063829787234
Iteration 150 accuracy: 0.8410596026490066
Iteration 160 accuracy: 0.8322981366459627
Iteration 170 accuracy: 0.8304093567251462
Iteration 180 accuracy: 0.8232044198895028
Iteration 190 accuracy: 0.8219895287958116
Iteration 200 accuracy: 0.8208955223880597
Iteration 210 accuracy: 0.8151658767772512
Iteration 220 accuracy: 0.8190045248868778
Iteration 230 accuracy: 0.81385281385281

In [46]:
print(f"Accuracy: {(accuracy / test_dataset_size)*100:.2f}%")

Accuracy: 82.63%
