In [None]:
import json
import requests
import os

def download_images_from_json(json_file_path):
    # Directory to save the images
    images_dir = 'images'
    os.makedirs(images_dir, exist_ok=True)

    # Function to download an image
    def download_image(url, save_path):
        response = requests.get(url)
        if response.status_code == 200:
            with open(save_path, 'wb') as img_file:
                img_file.write(response.content)
            print(f"Downloaded {save_path}")
        else:
            print(f"Failed to download {url}")

    # Load the JSON data
    with open(json_file_path, 'r') as json_file:
        cards = json.load(json_file)

    # Iterate over each card in the JSON list
    for card in cards:
        # Download the large image for the card
        card_id = card['id']
        image_url = card['images']['large']
        save_path = os.path.join(images_dir, f"{card_id}.png")
        download_image(image_url, save_path)

# Example usage
download_images_from_json('pokemon-tcg-data-master/cards/en/sm115.json')

In [None]:
def flatten_metadata(metadata):
    flat_metadata = {}

    def _flatten(obj, parent_key=""):
        if isinstance(obj, dict):
            for k, v in obj.items():
                new_key = f"{parent_key}_{k}" if parent_key else k
                _flatten(v, new_key)
        elif isinstance(obj, list):
            for i, v in enumerate(obj):
                new_key = f"{parent_key}_{i}" if parent_key else str(i)
                _flatten(v, new_key)
        else:
            flat_metadata[parent_key] = obj

    _flatten(metadata)
    return flat_metadata

In [None]:
import json
import os
import chromadb


def vectorize_cards(image_dir, json_file_path):
    # Load the ChromaDB client
    chroma_client = chromadb.PersistentClient(path="chromadb")
    collection = chroma_client.get_collection("pokemon_cards")

    # Create a directory to save the vectorized images
    output_dir = "vectorized_images"
    os.makedirs(output_dir, exist_ok=True)

    # Load the JSON data
    with open(json_file_path, "r") as json_file:
        cards = json.load(json_file)

    # Create a dictionary to map card IDs to their metadata
    card_metadata = card_metadata = {
        card["id"]: flatten_metadata({k: v for k, v in card.items() if k != "images"})
        for card in cards
    }

    # Prepare a list to hold the data to be added to the collection
    data_to_add = []

    # Vectorize each image in the directory
    ids = []
    metadatas = []
    images = []

    for image_file in os.listdir(image_dir):
        if image_file.endswith(".png"):
            card_id = os.path.splitext(image_file)[0]
            image_path = os.path.join(image_dir, image_file)

            # Prepare the data entry
            ids.append(card_id)
            images.append(image_path)
            if card_id in card_metadata:
                metadatas.append(card_metadata[card_id])
            else:
                metadatas.append({})

    # Add all data to the collection at once
    collection.add(ids=ids, metadatas=metadatas, images=images)

    print("Vectorization completed.")


# Example usage
vectorize_cards("images", "pokemon-tcg-data-master/cards/en/sm115.json")

In [None]:
chroma_client = chromadb.PersistentClient(path="chromadb")
collection = chroma_client.get_collection("pokemon_cards")

results = collection.query(
    query_images=[
        "/Users/pabloelgueta/Documents/trading_card_detection/cropped_images/frame_225_crop_4.jpg"
    ], n_results=3
)

print("Query Results:", results)


In [None]:
def vectorize_image(url: str):
    import requests

    # Define the endpoint and parameters
    endpoint = "https://pokemon-cards.cognitiveservices.azure.com/computervision/retrieval:vectorizeImage"
    api_version = "2024-02-01"
    model_version = "2023-04-15"
    subscription_key = "b365927a9ad0473fa1d4054ecd6a77c8"

    # Define the headers
    headers = {
        "Content-Type": "application/json",
        "Ocp-Apim-Subscription-Key": subscription_key,
    }

    # Define the data payload
    data = {
        "url": url
    }

    # Make the POST request
    response = requests.post(
        f"{endpoint}?api-version={api_version}&model-version={model_version}",
        headers=headers,
        json=data,
    )

    # Print the response
    print(response.status_code)
    print(response.json())
    response = response.json()
    return response["vector"]

# Example usage


In [None]:
from chromadb import Documents, EmbeddingFunction, Embeddings


class MyEmbeddingFunction(EmbeddingFunction):
    def __call__(self, input) -> Embeddings:
        return [vectorize_image(image) for image in input]


In [None]:
import os
files = os.listdir("/Users/pabloelgueta/Documents/trading_card_detection/images")
files = [
    os.path.join("/Users/pabloelgueta/Documents/trading_card_detection/images", f)
    for f in files
    if f.lower().endswith(".png")
]

In [None]:
import faiss
import numpy as np
import torch
import torchvision.transforms as T
from PIL import Image
import cv2
import json
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
import supervision as sv

In [None]:

# Load the model
dinov2_vits14 = torch.hub.load("facebookresearch/dinov2", "dinov2_vits14")

# Select the device
device = torch.device("cpu")

# Move the model to the selected device
dinov2_vits14.to(device)

print(f"Model loaded on {device}")

In [None]:
transform_image = T.Compose(
    [T.ToTensor(), T.Resize(244), T.CenterCrop(224), T.Normalize([0.5], [0.5])]
)


def load_image(img: str) -> torch.Tensor:
    """
    Load an image and return a tensor that can be used as an input to DINOv2.
    """
    img = Image.open(img)

    transformed_img = transform_image(img)[:3].unsqueeze(0)

    return transformed_img

In [None]:
def create_index(files: list) -> faiss.IndexFlatL2:
    """
    Create an index that contains all of the images in the specified list of files.
    """
    index = faiss.IndexFlatL2(384)

    all_embeddings = {}

    with torch.no_grad():
        for i, file in enumerate(files):
            print(f"Processing file {i+1}/{len(files)}: {file}")
            embeddings = dinov2_vits14(load_image(file).to(device))

            embedding = embeddings[0].cpu().numpy()

            all_embeddings[file] = np.array(embedding).reshape(1, -1).tolist()

            index.add(np.array(embedding).reshape(1, -1))

    with open("all_embeddings.json", "w") as f:
        f.write(json.dumps(all_embeddings))

    faiss.write_index(index, "data.bin")

    return index, all_embeddings

In [None]:
data_index, all_embeddings = create_index(files)

In [None]:
import uuid
from langgraph_main import _print_event, part_1_graph
from dotenv import load_dotenv
import os
load_dotenv()
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Pokemon Trainer"
thread_id = str(uuid.uuid4())

config = {
    "configurable": {

        "thread_id": thread_id,
    }
}
_printed = set()

events = part_1_graph.stream(
    {
        "messages": (
            "user",
            "Just use the card identify tool",
        )
    },
    config,
    stream_mode="values",
)
for event in events:
    _print_event(event, _printed)

In [None]:
from identify_cards import trigger_crop
trigger_crop()

In [1]:
from transformers import pipeline
import torch

device = "mps"

classifier = pipeline(
    "audio-classification", model="MIT/ast-finetuned-speech-commands-v2", device=device
)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from transformers.pipelines.audio_utils import ffmpeg_microphone_live


def launch_fn(
    wake_word="marvin",
    prob_threshold=0.5,
    chunk_length_s=2.0,
    stream_chunk_s=0.25,
    debug=True,
):
    if wake_word not in classifier.model.config.label2id.keys():
        raise ValueError(
            f"Wake word {wake_word} not in set of valid class labels, pick a wake word in the set {classifier.model.config.label2id.keys()}."
        )

    sampling_rate = classifier.feature_extractor.sampling_rate

    mic = ffmpeg_microphone_live(
        sampling_rate=sampling_rate,
        chunk_length_s=chunk_length_s,
        stream_chunk_s=stream_chunk_s,
    )

    print("Listening for wake word...")
    for prediction in classifier(mic):
        prediction = prediction[0]
        if debug:
            print(prediction)
        if prediction["label"] == wake_word:
            if prediction["score"] > prob_threshold:
                return True

In [5]:
launch_fn()

Listening for wake word...




{'score': 0.05254264548420906, 'label': 'two'}
{'score': 0.05628431588411331, 'label': 'two'}
{'score': 0.07304739207029343, 'label': 'off'}
{'score': 0.08420605957508087, 'label': 'follow'}
{'score': 0.09892973303794861, 'label': 'follow'}
{'score': 0.1055353656411171, 'label': 'follow'}
{'score': 0.1055353656411171, 'label': 'follow'}
{'score': 0.1055353656411171, 'label': 'follow'}
{'score': 0.09557732939720154, 'label': 'up'}
{'score': 0.08638238161802292, 'label': 'off'}
{'score': 0.08864366263151169, 'label': 'six'}
{'score': 0.08864366263151169, 'label': 'six'}
{'score': 0.08864366263151169, 'label': 'six'}
{'score': 0.08864366263151169, 'label': 'six'}
{'score': 0.08984789997339249, 'label': 'off'}
{'score': 0.08319336920976639, 'label': 'follow'}
{'score': 0.08319336920976639, 'label': 'follow'}
{'score': 0.08319336920976639, 'label': 'follow'}
{'score': 0.08319336920976639, 'label': 'follow'}
{'score': 0.10203444957733154, 'label': 'off'}
{'score': 0.11466445028781891, 'label

KeyboardInterrupt: 

In [6]:
classifier.model.config.id2label

{0: 'backward',
 1: 'follow',
 2: 'five',
 3: 'bed',
 4: 'zero',
 5: 'on',
 6: 'learn',
 7: 'two',
 8: 'house',
 9: 'tree',
 10: 'dog',
 11: 'stop',
 12: 'seven',
 13: 'eight',
 14: 'down',
 15: 'six',
 16: 'forward',
 17: 'cat',
 18: 'right',
 19: 'visual',
 20: 'four',
 21: 'wow',
 22: 'no',
 23: 'nine',
 24: 'off',
 25: 'three',
 26: 'left',
 27: 'marvin',
 28: 'yes',
 29: 'up',
 30: 'sheila',
 31: 'happy',
 32: 'bird',
 33: 'go',
 34: 'one'}

In [1]:
from IPython.display import Audio

from audio_commands import synthesise

audio = synthesise(
    "Hugging Face is a company that provides natural language processing and machine learning tools for developers."
)

Audio(audio, rate=16000)

  from .autonotebook import tqdm as notebook_tqdm
