In [1]:
import json
import os
import chromadb
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer
import numpy as np

file_path = "listings/metadata/listings_0.json"

with open(file_path, "r", encoding="utf-8") as f:
    data = [json.loads(line) for line in f]  # Read each line as a separate JSON object

n_data_samples = len(data)
print(n_data_samples)  # Number of JSON objects in the file

  from .autonotebook import tqdm as notebook_tqdm


9232


In [2]:
all_keys = set()
for idx, item in enumerate(data):
    for key in item.keys():
        if key not in all_keys and idx != 0:
            print(f"idx {idx}, new key is {key}")
    all_keys.update(item.keys())

idx 1, new key is item_dimensions
idx 1, new key is item_weight
idx 2, new key is material
idx 4, new key is fabric_type
idx 4, new key is color_code
idx 9, new key is product_description
idx 38, new key is spin_id
idx 38, new key is 3dmodel_id
idx 54, new key is pattern
idx 103, new key is finish_type
idx 103, new key is item_shape


In [3]:
import os
import json

# Directory containing JSON/JSONL files
json_folder = "listings/metadata"

def format_for_rag(data):
    """General function to format JSON into a structured string for RAG, handling different keys dynamically."""
    parts = []

    def extract_values(value):
        """Extract values recursively from dicts and lists."""
        if isinstance(value, dict):
            return ", ".join(f"{k}: {extract_values(v)}" if k != "value" else f"{extract_values(v)}" for k, v in value.items())
        elif isinstance(value, list):
            return ", ".join(str(extract_values(v)) for v in value)
        return str(value)

    for key, value in data.items():
        formatted_value = extract_values(value)
        parts.append(f"{key.capitalize()}: {formatted_value}")

    return "\n".join(parts)


In [4]:
formatted_data_sample = format_for_rag(data[1342]) 
# Some strings contain too many characters.
# There may be smarter ways to generate the formatted string...
# Also, so descriptions are not in English, one idea my be to translate them to English.

print(formatted_data_sample)

Brand: language_tag: en_IN, Amazon Brand - Solimo
Bullet_point: language_tag: en_IN, Snug fit for Mobile, with perfect cut-outs for volume buttons, audio and charging ports, language_tag: en_IN, Compatible with Realme C3, language_tag: en_IN, Easy to put & take off with perfect cutouts for volume buttons, audio & charging ports, language_tag: en_IN, Stylish design and appearance, express your unique personality, language_tag: en_IN, Extreme precision design allows easy access to all buttons and ports while featuring raised bezel to life screen and camera off flat surface, language_tag: en_IN, No warranty
Color: language_tag: en_IN, standardized_values: multi-colored, multi-colored
Item_id: B085636YJS
Item_name: language_tag: en_IN, Amazon Brand - Solimo Designer Multicolor Love U Dad Printed Soft Back Case Mobile Cover for Realme C3
Item_weight: normalized_value: unit: pounds, 0.110231131, unit: grams, 50
Model_name: language_tag: en_IN, Realme C3
Model_number: SOLRMC3TPU0278
Product_t

In [5]:
# ChromaDB with chuncking https://chatgpt.com/share/67c66fd2-3b00-800f-9b27-ad2a00ec8ae6

In [6]:
formatted_data = np.array([format_for_rag(data[i]) for i in range(n_data_samples)])

In [7]:
# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(path="./chroma_db")
collection = chroma_client.get_or_create_collection(name="products")

# Load embedding model (Sentence Transformers or OpenAI)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2") 
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
# By default, input text longer than 256 word pieces is truncated.

In [8]:
# 128 tokens because of the way the model was trained.
def chunk_text(text, max_tokens=128):
    """Split text into chunks of max_tokens using a tokenizer."""
    tokens = tokenizer.encode(text, add_special_tokens=False)
    chunks = [tokens[i:i+max_tokens] for i in range(0, len(tokens), max_tokens)]
    return [tokenizer.decode(chunk) for chunk in chunks]

In [9]:
# Process each text entry
n_samples = 100

for i, text in enumerate(formatted_data[:n_samples]):
    text_chunks = chunk_text(text)  # Split text into chunks

    # Generate embeddings for each chunk
    embeddings = embedding_model.encode(text_chunks)

    # Store in ChromaDB
    for j, (chunk, embedding) in enumerate(zip(text_chunks, embeddings)):
        collection.add(
            ids=[f"item_{i}_chunk_{j}"],  # Unique ID per chunk
            embeddings=[embedding.tolist()],  # Convert NumPy array to list
            metadatas=[{"original_id": i, "chunk_index": j, "text": chunk}]
        )

    print(f"✅ Processed {len(text_chunks)} chunks for text entry {i}")

print("🚀 All data stored in ChromaDB successfully!")

Insert of existing embedding ID: item_0_chunk_0
Add of existing embedding ID: item_0_chunk_0
Insert of existing embedding ID: item_0_chunk_1
Add of existing embedding ID: item_0_chunk_1
Insert of existing embedding ID: item_0_chunk_2
Add of existing embedding ID: item_0_chunk_2
Insert of existing embedding ID: item_0_chunk_3
Add of existing embedding ID: item_0_chunk_3
Token indices sequence length is longer than the specified maximum sequence length for this model (868 > 512). Running this sequence through the model will result in indexing errors
Insert of existing embedding ID: item_1_chunk_0
Add of existing embedding ID: item_1_chunk_0
Insert of existing embedding ID: item_1_chunk_1
Add of existing embedding ID: item_1_chunk_1
Insert of existing embedding ID: item_1_chunk_2
Add of existing embedding ID: item_1_chunk_2
Insert of existing embedding ID: item_1_chunk_3
Add of existing embedding ID: item_1_chunk_3
Insert of existing embedding ID: item_1_chunk_4
Add of existing embedding 

✅ Processed 4 chunks for text entry 0
✅ Processed 7 chunks for text entry 1
✅ Processed 6 chunks for text entry 2


Insert of existing embedding ID: item_3_chunk_0
Add of existing embedding ID: item_3_chunk_0
Insert of existing embedding ID: item_3_chunk_1
Add of existing embedding ID: item_3_chunk_1
Insert of existing embedding ID: item_3_chunk_2
Add of existing embedding ID: item_3_chunk_2
Insert of existing embedding ID: item_3_chunk_3
Add of existing embedding ID: item_3_chunk_3
Insert of existing embedding ID: item_3_chunk_4
Add of existing embedding ID: item_3_chunk_4
Insert of existing embedding ID: item_3_chunk_5
Add of existing embedding ID: item_3_chunk_5
Insert of existing embedding ID: item_3_chunk_6
Add of existing embedding ID: item_3_chunk_6
Insert of existing embedding ID: item_3_chunk_7
Add of existing embedding ID: item_3_chunk_7
Insert of existing embedding ID: item_3_chunk_8
Add of existing embedding ID: item_3_chunk_8
Insert of existing embedding ID: item_4_chunk_0
Add of existing embedding ID: item_4_chunk_0
Insert of existing embedding ID: item_4_chunk_1
Add of existing embedd

✅ Processed 9 chunks for text entry 3
✅ Processed 3 chunks for text entry 4


Insert of existing embedding ID: item_5_chunk_0
Add of existing embedding ID: item_5_chunk_0
Insert of existing embedding ID: item_5_chunk_1
Add of existing embedding ID: item_5_chunk_1
Insert of existing embedding ID: item_5_chunk_2
Add of existing embedding ID: item_5_chunk_2
Insert of existing embedding ID: item_5_chunk_3
Add of existing embedding ID: item_5_chunk_3
Insert of existing embedding ID: item_5_chunk_4
Add of existing embedding ID: item_5_chunk_4
Insert of existing embedding ID: item_5_chunk_5
Add of existing embedding ID: item_5_chunk_5
Insert of existing embedding ID: item_5_chunk_6
Add of existing embedding ID: item_5_chunk_6
Insert of existing embedding ID: item_5_chunk_7
Add of existing embedding ID: item_5_chunk_7
Insert of existing embedding ID: item_5_chunk_8
Add of existing embedding ID: item_5_chunk_8
Insert of existing embedding ID: item_5_chunk_9
Add of existing embedding ID: item_5_chunk_9
Insert of existing embedding ID: item_5_chunk_10
Add of existing embed

✅ Processed 18 chunks for text entry 5
✅ Processed 4 chunks for text entry 6
✅ Processed 5 chunks for text entry 7


Insert of existing embedding ID: item_8_chunk_0
Add of existing embedding ID: item_8_chunk_0
Insert of existing embedding ID: item_8_chunk_1
Add of existing embedding ID: item_8_chunk_1
Insert of existing embedding ID: item_8_chunk_2
Add of existing embedding ID: item_8_chunk_2
Insert of existing embedding ID: item_8_chunk_3
Add of existing embedding ID: item_8_chunk_3
Insert of existing embedding ID: item_8_chunk_4
Add of existing embedding ID: item_8_chunk_4
Insert of existing embedding ID: item_8_chunk_5
Add of existing embedding ID: item_8_chunk_5
Insert of existing embedding ID: item_8_chunk_6
Add of existing embedding ID: item_8_chunk_6
Insert of existing embedding ID: item_8_chunk_7
Add of existing embedding ID: item_8_chunk_7
Insert of existing embedding ID: item_8_chunk_8
Add of existing embedding ID: item_8_chunk_8
Insert of existing embedding ID: item_8_chunk_9
Add of existing embedding ID: item_8_chunk_9
Insert of existing embedding ID: item_8_chunk_10
Add of existing embed

✅ Processed 18 chunks for text entry 8
✅ Processed 6 chunks for text entry 9
✅ Processed 2 chunks for text entry 10


Insert of existing embedding ID: item_11_chunk_4
Add of existing embedding ID: item_11_chunk_4
Insert of existing embedding ID: item_12_chunk_0
Add of existing embedding ID: item_12_chunk_0
Insert of existing embedding ID: item_12_chunk_1
Add of existing embedding ID: item_12_chunk_1
Insert of existing embedding ID: item_12_chunk_2
Add of existing embedding ID: item_12_chunk_2
Insert of existing embedding ID: item_12_chunk_3
Add of existing embedding ID: item_12_chunk_3
Insert of existing embedding ID: item_12_chunk_4
Add of existing embedding ID: item_12_chunk_4
Insert of existing embedding ID: item_13_chunk_0
Add of existing embedding ID: item_13_chunk_0
Insert of existing embedding ID: item_13_chunk_1
Add of existing embedding ID: item_13_chunk_1
Insert of existing embedding ID: item_13_chunk_2
Add of existing embedding ID: item_13_chunk_2
Insert of existing embedding ID: item_13_chunk_3
Add of existing embedding ID: item_13_chunk_3
Insert of existing embedding ID: item_13_chunk_4
A

✅ Processed 5 chunks for text entry 11
✅ Processed 5 chunks for text entry 12
✅ Processed 16 chunks for text entry 13
✅ Processed 4 chunks for text entry 14


Insert of existing embedding ID: item_15_chunk_0
Add of existing embedding ID: item_15_chunk_0
Insert of existing embedding ID: item_15_chunk_1
Add of existing embedding ID: item_15_chunk_1
Insert of existing embedding ID: item_15_chunk_2
Add of existing embedding ID: item_15_chunk_2
Insert of existing embedding ID: item_15_chunk_3
Add of existing embedding ID: item_15_chunk_3
Insert of existing embedding ID: item_15_chunk_4
Add of existing embedding ID: item_15_chunk_4
Insert of existing embedding ID: item_15_chunk_5
Add of existing embedding ID: item_15_chunk_5
Insert of existing embedding ID: item_15_chunk_6
Add of existing embedding ID: item_15_chunk_6
Insert of existing embedding ID: item_15_chunk_7
Add of existing embedding ID: item_15_chunk_7
Insert of existing embedding ID: item_15_chunk_8
Add of existing embedding ID: item_15_chunk_8
Insert of existing embedding ID: item_15_chunk_9
Add of existing embedding ID: item_15_chunk_9
Insert of existing embedding ID: item_15_chunk_10


✅ Processed 18 chunks for text entry 15
✅ Processed 18 chunks for text entry 16
✅ Processed 5 chunks for text entry 17
✅ Processed 5 chunks for text entry 18


Insert of existing embedding ID: item_19_chunk_0
Add of existing embedding ID: item_19_chunk_0
Insert of existing embedding ID: item_19_chunk_1
Add of existing embedding ID: item_19_chunk_1
Insert of existing embedding ID: item_19_chunk_2
Add of existing embedding ID: item_19_chunk_2
Insert of existing embedding ID: item_19_chunk_3
Add of existing embedding ID: item_19_chunk_3
Insert of existing embedding ID: item_20_chunk_0
Add of existing embedding ID: item_20_chunk_0
Insert of existing embedding ID: item_20_chunk_1
Add of existing embedding ID: item_20_chunk_1
Insert of existing embedding ID: item_20_chunk_2
Add of existing embedding ID: item_20_chunk_2
Insert of existing embedding ID: item_20_chunk_3
Add of existing embedding ID: item_20_chunk_3
Insert of existing embedding ID: item_20_chunk_4
Add of existing embedding ID: item_20_chunk_4
Insert of existing embedding ID: item_20_chunk_5
Add of existing embedding ID: item_20_chunk_5
Insert of existing embedding ID: item_20_chunk_6
A

✅ Processed 4 chunks for text entry 19
✅ Processed 15 chunks for text entry 20


Insert of existing embedding ID: item_21_chunk_0
Add of existing embedding ID: item_21_chunk_0
Insert of existing embedding ID: item_21_chunk_1
Add of existing embedding ID: item_21_chunk_1
Insert of existing embedding ID: item_21_chunk_2
Add of existing embedding ID: item_21_chunk_2
Insert of existing embedding ID: item_22_chunk_0
Add of existing embedding ID: item_22_chunk_0
Insert of existing embedding ID: item_22_chunk_1
Add of existing embedding ID: item_22_chunk_1
Insert of existing embedding ID: item_22_chunk_2
Add of existing embedding ID: item_22_chunk_2
Insert of existing embedding ID: item_22_chunk_3
Add of existing embedding ID: item_22_chunk_3
Insert of existing embedding ID: item_22_chunk_4
Add of existing embedding ID: item_22_chunk_4
Insert of existing embedding ID: item_22_chunk_5
Add of existing embedding ID: item_22_chunk_5
Insert of existing embedding ID: item_22_chunk_6
Add of existing embedding ID: item_22_chunk_6
Insert of existing embedding ID: item_23_chunk_0
A

✅ Processed 3 chunks for text entry 21
✅ Processed 7 chunks for text entry 22
✅ Processed 15 chunks for text entry 23


Insert of existing embedding ID: item_24_chunk_0
Add of existing embedding ID: item_24_chunk_0
Insert of existing embedding ID: item_24_chunk_1
Add of existing embedding ID: item_24_chunk_1
Insert of existing embedding ID: item_24_chunk_2
Add of existing embedding ID: item_24_chunk_2
Insert of existing embedding ID: item_24_chunk_3
Add of existing embedding ID: item_24_chunk_3
Insert of existing embedding ID: item_24_chunk_4
Add of existing embedding ID: item_24_chunk_4
Insert of existing embedding ID: item_24_chunk_5
Add of existing embedding ID: item_24_chunk_5
Insert of existing embedding ID: item_24_chunk_6
Add of existing embedding ID: item_24_chunk_6
Insert of existing embedding ID: item_24_chunk_7
Add of existing embedding ID: item_24_chunk_7
Insert of existing embedding ID: item_24_chunk_8
Add of existing embedding ID: item_24_chunk_8
Insert of existing embedding ID: item_24_chunk_9
Add of existing embedding ID: item_24_chunk_9
Insert of existing embedding ID: item_24_chunk_10


✅ Processed 19 chunks for text entry 24
✅ Processed 13 chunks for text entry 25


Insert of existing embedding ID: item_26_chunk_0
Add of existing embedding ID: item_26_chunk_0
Insert of existing embedding ID: item_26_chunk_1
Add of existing embedding ID: item_26_chunk_1
Insert of existing embedding ID: item_26_chunk_2
Add of existing embedding ID: item_26_chunk_2
Insert of existing embedding ID: item_26_chunk_3
Add of existing embedding ID: item_26_chunk_3
Insert of existing embedding ID: item_26_chunk_4
Add of existing embedding ID: item_26_chunk_4
Insert of existing embedding ID: item_26_chunk_5
Add of existing embedding ID: item_26_chunk_5
Insert of existing embedding ID: item_26_chunk_6
Add of existing embedding ID: item_26_chunk_6
Insert of existing embedding ID: item_26_chunk_7
Add of existing embedding ID: item_26_chunk_7
Insert of existing embedding ID: item_26_chunk_8
Add of existing embedding ID: item_26_chunk_8
Insert of existing embedding ID: item_26_chunk_9
Add of existing embedding ID: item_26_chunk_9
Insert of existing embedding ID: item_26_chunk_10


✅ Processed 15 chunks for text entry 26
✅ Processed 16 chunks for text entry 27


Add of existing embedding ID: item_28_chunk_8
Insert of existing embedding ID: item_28_chunk_9
Add of existing embedding ID: item_28_chunk_9
Insert of existing embedding ID: item_28_chunk_10
Add of existing embedding ID: item_28_chunk_10
Insert of existing embedding ID: item_28_chunk_11
Add of existing embedding ID: item_28_chunk_11
Insert of existing embedding ID: item_28_chunk_12
Add of existing embedding ID: item_28_chunk_12
Insert of existing embedding ID: item_28_chunk_13
Add of existing embedding ID: item_28_chunk_13
Insert of existing embedding ID: item_28_chunk_14
Add of existing embedding ID: item_28_chunk_14
Insert of existing embedding ID: item_28_chunk_15
Add of existing embedding ID: item_28_chunk_15
Insert of existing embedding ID: item_29_chunk_0
Add of existing embedding ID: item_29_chunk_0
Insert of existing embedding ID: item_29_chunk_1
Add of existing embedding ID: item_29_chunk_1
Insert of existing embedding ID: item_29_chunk_2
Add of existing embedding ID: item_29_

✅ Processed 16 chunks for text entry 28
✅ Processed 16 chunks for text entry 29
✅ Processed 2 chunks for text entry 30
✅ Processed 4 chunks for text entry 31


Insert of existing embedding ID: item_32_chunk_0
Add of existing embedding ID: item_32_chunk_0
Insert of existing embedding ID: item_32_chunk_1
Add of existing embedding ID: item_32_chunk_1
Insert of existing embedding ID: item_32_chunk_2
Add of existing embedding ID: item_32_chunk_2
Insert of existing embedding ID: item_32_chunk_3
Add of existing embedding ID: item_32_chunk_3
Insert of existing embedding ID: item_32_chunk_4
Add of existing embedding ID: item_32_chunk_4
Insert of existing embedding ID: item_32_chunk_5
Add of existing embedding ID: item_32_chunk_5
Insert of existing embedding ID: item_33_chunk_0
Add of existing embedding ID: item_33_chunk_0
Insert of existing embedding ID: item_33_chunk_1
Add of existing embedding ID: item_33_chunk_1
Insert of existing embedding ID: item_33_chunk_2
Add of existing embedding ID: item_33_chunk_2
Insert of existing embedding ID: item_33_chunk_3
Add of existing embedding ID: item_33_chunk_3
Insert of existing embedding ID: item_33_chunk_4
A

✅ Processed 6 chunks for text entry 32
✅ Processed 5 chunks for text entry 33
✅ Processed 4 chunks for text entry 34
✅ Processed 5 chunks for text entry 35


Insert of existing embedding ID: item_36_chunk_0
Add of existing embedding ID: item_36_chunk_0
Insert of existing embedding ID: item_36_chunk_1
Add of existing embedding ID: item_36_chunk_1
Insert of existing embedding ID: item_36_chunk_2
Add of existing embedding ID: item_36_chunk_2
Insert of existing embedding ID: item_36_chunk_3
Add of existing embedding ID: item_36_chunk_3
Insert of existing embedding ID: item_36_chunk_4
Add of existing embedding ID: item_36_chunk_4
Insert of existing embedding ID: item_37_chunk_0
Add of existing embedding ID: item_37_chunk_0
Insert of existing embedding ID: item_37_chunk_1
Add of existing embedding ID: item_37_chunk_1
Insert of existing embedding ID: item_37_chunk_2
Add of existing embedding ID: item_37_chunk_2
Insert of existing embedding ID: item_37_chunk_3
Add of existing embedding ID: item_37_chunk_3
Insert of existing embedding ID: item_37_chunk_4
Add of existing embedding ID: item_37_chunk_4
Insert of existing embedding ID: item_37_chunk_5
A

✅ Processed 5 chunks for text entry 36
✅ Processed 6 chunks for text entry 37


Insert of existing embedding ID: item_38_chunk_8
Add of existing embedding ID: item_38_chunk_8
Insert of existing embedding ID: item_38_chunk_9
Add of existing embedding ID: item_38_chunk_9
Insert of existing embedding ID: item_38_chunk_10
Add of existing embedding ID: item_38_chunk_10
Insert of existing embedding ID: item_38_chunk_11
Add of existing embedding ID: item_38_chunk_11
Insert of existing embedding ID: item_38_chunk_12
Add of existing embedding ID: item_38_chunk_12
Insert of existing embedding ID: item_38_chunk_13
Add of existing embedding ID: item_38_chunk_13
Insert of existing embedding ID: item_38_chunk_14
Add of existing embedding ID: item_38_chunk_14
Insert of existing embedding ID: item_38_chunk_15
Add of existing embedding ID: item_38_chunk_15
Insert of existing embedding ID: item_38_chunk_16
Add of existing embedding ID: item_38_chunk_16
Insert of existing embedding ID: item_38_chunk_17
Add of existing embedding ID: item_38_chunk_17
Insert of existing embedding ID: i

✅ Processed 27 chunks for text entry 38
✅ Processed 6 chunks for text entry 39
✅ Processed 5 chunks for text entry 40
✅ Processed 4 chunks for text entry 41


Insert of existing embedding ID: item_42_chunk_0
Add of existing embedding ID: item_42_chunk_0
Insert of existing embedding ID: item_42_chunk_1
Add of existing embedding ID: item_42_chunk_1
Insert of existing embedding ID: item_42_chunk_2
Add of existing embedding ID: item_42_chunk_2
Insert of existing embedding ID: item_42_chunk_3
Add of existing embedding ID: item_42_chunk_3
Insert of existing embedding ID: item_42_chunk_4
Add of existing embedding ID: item_42_chunk_4
Insert of existing embedding ID: item_42_chunk_5
Add of existing embedding ID: item_42_chunk_5
Insert of existing embedding ID: item_42_chunk_6
Add of existing embedding ID: item_42_chunk_6
Insert of existing embedding ID: item_42_chunk_7
Add of existing embedding ID: item_42_chunk_7
Insert of existing embedding ID: item_42_chunk_8
Add of existing embedding ID: item_42_chunk_8
Insert of existing embedding ID: item_42_chunk_9
Add of existing embedding ID: item_42_chunk_9
Insert of existing embedding ID: item_42_chunk_10


✅ Processed 15 chunks for text entry 42
✅ Processed 5 chunks for text entry 43


Insert of existing embedding ID: item_44_chunk_15
Add of existing embedding ID: item_44_chunk_15
Insert of existing embedding ID: item_44_chunk_16
Add of existing embedding ID: item_44_chunk_16
Insert of existing embedding ID: item_44_chunk_17
Add of existing embedding ID: item_44_chunk_17
Insert of existing embedding ID: item_44_chunk_18
Add of existing embedding ID: item_44_chunk_18
Insert of existing embedding ID: item_44_chunk_19
Add of existing embedding ID: item_44_chunk_19
Insert of existing embedding ID: item_44_chunk_20
Add of existing embedding ID: item_44_chunk_20
Insert of existing embedding ID: item_45_chunk_0
Add of existing embedding ID: item_45_chunk_0
Insert of existing embedding ID: item_45_chunk_1
Add of existing embedding ID: item_45_chunk_1
Insert of existing embedding ID: item_45_chunk_2
Add of existing embedding ID: item_45_chunk_2
Insert of existing embedding ID: item_45_chunk_3
Add of existing embedding ID: item_45_chunk_3
Insert of existing embedding ID: item_

✅ Processed 21 chunks for text entry 44
✅ Processed 20 chunks for text entry 45
✅ Processed 3 chunks for text entry 46


Insert of existing embedding ID: item_47_chunk_0
Add of existing embedding ID: item_47_chunk_0
Insert of existing embedding ID: item_47_chunk_1
Add of existing embedding ID: item_47_chunk_1
Insert of existing embedding ID: item_47_chunk_2
Add of existing embedding ID: item_47_chunk_2
Insert of existing embedding ID: item_47_chunk_3
Add of existing embedding ID: item_47_chunk_3
Insert of existing embedding ID: item_47_chunk_4
Add of existing embedding ID: item_47_chunk_4
Insert of existing embedding ID: item_47_chunk_5
Add of existing embedding ID: item_47_chunk_5
Insert of existing embedding ID: item_47_chunk_6
Add of existing embedding ID: item_47_chunk_6
Insert of existing embedding ID: item_47_chunk_7
Add of existing embedding ID: item_47_chunk_7
Insert of existing embedding ID: item_47_chunk_8
Add of existing embedding ID: item_47_chunk_8
Insert of existing embedding ID: item_47_chunk_9
Add of existing embedding ID: item_47_chunk_9
Insert of existing embedding ID: item_47_chunk_10


✅ Processed 19 chunks for text entry 47
✅ Processed 16 chunks for text entry 48
✅ Processed 2 chunks for text entry 49
✅ Processed 3 chunks for text entry 50


Insert of existing embedding ID: item_51_chunk_0
Add of existing embedding ID: item_51_chunk_0
Insert of existing embedding ID: item_51_chunk_1
Add of existing embedding ID: item_51_chunk_1
Insert of existing embedding ID: item_51_chunk_2
Add of existing embedding ID: item_51_chunk_2
Insert of existing embedding ID: item_51_chunk_3
Add of existing embedding ID: item_51_chunk_3
Insert of existing embedding ID: item_51_chunk_4
Add of existing embedding ID: item_51_chunk_4
Insert of existing embedding ID: item_51_chunk_5
Add of existing embedding ID: item_51_chunk_5
Insert of existing embedding ID: item_51_chunk_6
Add of existing embedding ID: item_51_chunk_6
Insert of existing embedding ID: item_51_chunk_7
Add of existing embedding ID: item_51_chunk_7
Insert of existing embedding ID: item_51_chunk_8
Add of existing embedding ID: item_51_chunk_8
Insert of existing embedding ID: item_51_chunk_9
Add of existing embedding ID: item_51_chunk_9
Insert of existing embedding ID: item_51_chunk_10


✅ Processed 13 chunks for text entry 51
✅ Processed 16 chunks for text entry 52


Insert of existing embedding ID: item_53_chunk_9
Add of existing embedding ID: item_53_chunk_9
Insert of existing embedding ID: item_53_chunk_10
Add of existing embedding ID: item_53_chunk_10
Insert of existing embedding ID: item_53_chunk_11
Add of existing embedding ID: item_53_chunk_11
Insert of existing embedding ID: item_53_chunk_12
Add of existing embedding ID: item_53_chunk_12
Insert of existing embedding ID: item_53_chunk_13
Add of existing embedding ID: item_53_chunk_13
Insert of existing embedding ID: item_53_chunk_14
Add of existing embedding ID: item_53_chunk_14
Insert of existing embedding ID: item_53_chunk_15
Add of existing embedding ID: item_53_chunk_15
Insert of existing embedding ID: item_53_chunk_16
Add of existing embedding ID: item_53_chunk_16
Insert of existing embedding ID: item_53_chunk_17
Add of existing embedding ID: item_53_chunk_17
Insert of existing embedding ID: item_53_chunk_18
Add of existing embedding ID: item_53_chunk_18
Insert of existing embedding ID:

✅ Processed 19 chunks for text entry 53
✅ Processed 18 chunks for text entry 54
✅ Processed 3 chunks for text entry 55


Insert of existing embedding ID: item_56_chunk_0
Add of existing embedding ID: item_56_chunk_0
Insert of existing embedding ID: item_56_chunk_1
Add of existing embedding ID: item_56_chunk_1
Insert of existing embedding ID: item_56_chunk_2
Add of existing embedding ID: item_56_chunk_2
Insert of existing embedding ID: item_56_chunk_3
Add of existing embedding ID: item_56_chunk_3
Insert of existing embedding ID: item_56_chunk_4
Add of existing embedding ID: item_56_chunk_4
Insert of existing embedding ID: item_56_chunk_5
Add of existing embedding ID: item_56_chunk_5
Insert of existing embedding ID: item_56_chunk_6
Add of existing embedding ID: item_56_chunk_6
Insert of existing embedding ID: item_56_chunk_7
Add of existing embedding ID: item_56_chunk_7
Insert of existing embedding ID: item_56_chunk_8
Add of existing embedding ID: item_56_chunk_8
Insert of existing embedding ID: item_56_chunk_9
Add of existing embedding ID: item_56_chunk_9
Insert of existing embedding ID: item_56_chunk_10


✅ Processed 19 chunks for text entry 56
✅ Processed 16 chunks for text entry 57
✅ Processed 6 chunks for text entry 58


Insert of existing embedding ID: item_59_chunk_16
Add of existing embedding ID: item_59_chunk_16
Insert of existing embedding ID: item_59_chunk_17
Add of existing embedding ID: item_59_chunk_17
Insert of existing embedding ID: item_60_chunk_0
Add of existing embedding ID: item_60_chunk_0
Insert of existing embedding ID: item_60_chunk_1
Add of existing embedding ID: item_60_chunk_1
Insert of existing embedding ID: item_61_chunk_0
Add of existing embedding ID: item_61_chunk_0
Insert of existing embedding ID: item_61_chunk_1
Add of existing embedding ID: item_61_chunk_1
Insert of existing embedding ID: item_61_chunk_2
Add of existing embedding ID: item_61_chunk_2
Insert of existing embedding ID: item_61_chunk_3
Add of existing embedding ID: item_61_chunk_3
Insert of existing embedding ID: item_61_chunk_4
Add of existing embedding ID: item_61_chunk_4
Insert of existing embedding ID: item_61_chunk_5
Add of existing embedding ID: item_61_chunk_5
Insert of existing embedding ID: item_61_chunk

✅ Processed 18 chunks for text entry 59
✅ Processed 2 chunks for text entry 60
✅ Processed 24 chunks for text entry 61


Insert of existing embedding ID: item_62_chunk_0
Add of existing embedding ID: item_62_chunk_0
Insert of existing embedding ID: item_62_chunk_1
Add of existing embedding ID: item_62_chunk_1
Insert of existing embedding ID: item_62_chunk_2
Add of existing embedding ID: item_62_chunk_2
Insert of existing embedding ID: item_62_chunk_3
Add of existing embedding ID: item_62_chunk_3
Insert of existing embedding ID: item_62_chunk_4
Add of existing embedding ID: item_62_chunk_4
Insert of existing embedding ID: item_62_chunk_5
Add of existing embedding ID: item_62_chunk_5
Insert of existing embedding ID: item_62_chunk_6
Add of existing embedding ID: item_62_chunk_6
Insert of existing embedding ID: item_62_chunk_7
Add of existing embedding ID: item_62_chunk_7
Insert of existing embedding ID: item_62_chunk_8
Add of existing embedding ID: item_62_chunk_8
Insert of existing embedding ID: item_62_chunk_9
Add of existing embedding ID: item_62_chunk_9
Insert of existing embedding ID: item_62_chunk_10


✅ Processed 19 chunks for text entry 62
✅ Processed 2 chunks for text entry 63
✅ Processed 2 chunks for text entry 64
✅ Processed 5 chunks for text entry 65


Insert of existing embedding ID: item_66_chunk_12
Add of existing embedding ID: item_66_chunk_12
Insert of existing embedding ID: item_66_chunk_13
Add of existing embedding ID: item_66_chunk_13
Insert of existing embedding ID: item_66_chunk_14
Add of existing embedding ID: item_66_chunk_14
Insert of existing embedding ID: item_66_chunk_15
Add of existing embedding ID: item_66_chunk_15
Insert of existing embedding ID: item_66_chunk_16
Add of existing embedding ID: item_66_chunk_16
Insert of existing embedding ID: item_66_chunk_17
Add of existing embedding ID: item_66_chunk_17
Insert of existing embedding ID: item_66_chunk_18
Add of existing embedding ID: item_66_chunk_18
Insert of existing embedding ID: item_67_chunk_0
Add of existing embedding ID: item_67_chunk_0
Insert of existing embedding ID: item_67_chunk_1
Add of existing embedding ID: item_67_chunk_1
Insert of existing embedding ID: item_67_chunk_2
Add of existing embedding ID: item_67_chunk_2
Insert of existing embedding ID: ite

✅ Processed 19 chunks for text entry 66
✅ Processed 11 chunks for text entry 67


Insert of existing embedding ID: item_68_chunk_0
Add of existing embedding ID: item_68_chunk_0
Insert of existing embedding ID: item_68_chunk_1
Add of existing embedding ID: item_68_chunk_1
Insert of existing embedding ID: item_68_chunk_2
Add of existing embedding ID: item_68_chunk_2
Insert of existing embedding ID: item_68_chunk_3
Add of existing embedding ID: item_68_chunk_3
Insert of existing embedding ID: item_68_chunk_4
Add of existing embedding ID: item_68_chunk_4
Insert of existing embedding ID: item_68_chunk_5
Add of existing embedding ID: item_68_chunk_5
Insert of existing embedding ID: item_68_chunk_6
Add of existing embedding ID: item_68_chunk_6
Insert of existing embedding ID: item_68_chunk_7
Add of existing embedding ID: item_68_chunk_7
Insert of existing embedding ID: item_68_chunk_8
Add of existing embedding ID: item_68_chunk_8
Insert of existing embedding ID: item_68_chunk_9
Add of existing embedding ID: item_68_chunk_9
Insert of existing embedding ID: item_68_chunk_10


✅ Processed 14 chunks for text entry 68
✅ Processed 3 chunks for text entry 69


Insert of existing embedding ID: item_70_chunk_14
Add of existing embedding ID: item_70_chunk_14
Insert of existing embedding ID: item_70_chunk_15
Add of existing embedding ID: item_70_chunk_15
Insert of existing embedding ID: item_70_chunk_16
Add of existing embedding ID: item_70_chunk_16
Insert of existing embedding ID: item_70_chunk_17
Add of existing embedding ID: item_70_chunk_17
Insert of existing embedding ID: item_71_chunk_0
Add of existing embedding ID: item_71_chunk_0
Insert of existing embedding ID: item_71_chunk_1
Add of existing embedding ID: item_71_chunk_1
Insert of existing embedding ID: item_71_chunk_2
Add of existing embedding ID: item_71_chunk_2
Insert of existing embedding ID: item_71_chunk_3
Add of existing embedding ID: item_71_chunk_3
Insert of existing embedding ID: item_71_chunk_4
Add of existing embedding ID: item_71_chunk_4
Insert of existing embedding ID: item_71_chunk_5
Add of existing embedding ID: item_71_chunk_5
Insert of existing embedding ID: item_71_c

✅ Processed 18 chunks for text entry 70
✅ Processed 15 chunks for text entry 71
✅ Processed 3 chunks for text entry 72


Insert of existing embedding ID: item_73_chunk_1
Add of existing embedding ID: item_73_chunk_1
Insert of existing embedding ID: item_73_chunk_2
Add of existing embedding ID: item_73_chunk_2
Insert of existing embedding ID: item_73_chunk_3
Add of existing embedding ID: item_73_chunk_3
Insert of existing embedding ID: item_73_chunk_4
Add of existing embedding ID: item_73_chunk_4
Insert of existing embedding ID: item_73_chunk_5
Add of existing embedding ID: item_73_chunk_5
Insert of existing embedding ID: item_73_chunk_6
Add of existing embedding ID: item_73_chunk_6
Insert of existing embedding ID: item_73_chunk_7
Add of existing embedding ID: item_73_chunk_7
Insert of existing embedding ID: item_73_chunk_8
Add of existing embedding ID: item_73_chunk_8
Insert of existing embedding ID: item_73_chunk_9
Add of existing embedding ID: item_73_chunk_9
Insert of existing embedding ID: item_73_chunk_10
Add of existing embedding ID: item_73_chunk_10
Insert of existing embedding ID: item_73_chunk_1

✅ Processed 16 chunks for text entry 73
✅ Processed 5 chunks for text entry 74


Add of existing embedding ID: item_75_chunk_6
Insert of existing embedding ID: item_75_chunk_7
Add of existing embedding ID: item_75_chunk_7
Insert of existing embedding ID: item_75_chunk_8
Add of existing embedding ID: item_75_chunk_8
Insert of existing embedding ID: item_76_chunk_0
Add of existing embedding ID: item_76_chunk_0
Insert of existing embedding ID: item_76_chunk_1
Add of existing embedding ID: item_76_chunk_1
Insert of existing embedding ID: item_76_chunk_2
Add of existing embedding ID: item_76_chunk_2
Insert of existing embedding ID: item_76_chunk_3
Add of existing embedding ID: item_76_chunk_3
Insert of existing embedding ID: item_77_chunk_0
Add of existing embedding ID: item_77_chunk_0
Insert of existing embedding ID: item_77_chunk_1
Add of existing embedding ID: item_77_chunk_1
Insert of existing embedding ID: item_77_chunk_2
Add of existing embedding ID: item_77_chunk_2
Insert of existing embedding ID: item_77_chunk_3
Add of existing embedding ID: item_77_chunk_3
Inse

✅ Processed 9 chunks for text entry 75
✅ Processed 4 chunks for text entry 76
✅ Processed 15 chunks for text entry 77


Insert of existing embedding ID: item_78_chunk_0
Add of existing embedding ID: item_78_chunk_0
Insert of existing embedding ID: item_78_chunk_1
Add of existing embedding ID: item_78_chunk_1
Insert of existing embedding ID: item_78_chunk_2
Add of existing embedding ID: item_78_chunk_2
Insert of existing embedding ID: item_78_chunk_3
Add of existing embedding ID: item_78_chunk_3
Insert of existing embedding ID: item_78_chunk_4
Add of existing embedding ID: item_78_chunk_4
Insert of existing embedding ID: item_78_chunk_5
Add of existing embedding ID: item_78_chunk_5
Insert of existing embedding ID: item_78_chunk_6
Add of existing embedding ID: item_78_chunk_6
Insert of existing embedding ID: item_78_chunk_7
Add of existing embedding ID: item_78_chunk_7
Insert of existing embedding ID: item_79_chunk_0
Add of existing embedding ID: item_79_chunk_0
Insert of existing embedding ID: item_79_chunk_1
Add of existing embedding ID: item_79_chunk_1
Insert of existing embedding ID: item_79_chunk_2
A

✅ Processed 8 chunks for text entry 78
✅ Processed 5 chunks for text entry 79
✅ Processed 19 chunks for text entry 80


Insert of existing embedding ID: item_81_chunk_9
Add of existing embedding ID: item_81_chunk_9
Insert of existing embedding ID: item_81_chunk_10
Add of existing embedding ID: item_81_chunk_10
Insert of existing embedding ID: item_81_chunk_11
Add of existing embedding ID: item_81_chunk_11
Insert of existing embedding ID: item_81_chunk_12
Add of existing embedding ID: item_81_chunk_12
Insert of existing embedding ID: item_81_chunk_13
Add of existing embedding ID: item_81_chunk_13
Insert of existing embedding ID: item_81_chunk_14
Add of existing embedding ID: item_81_chunk_14
Insert of existing embedding ID: item_81_chunk_15
Add of existing embedding ID: item_81_chunk_15
Insert of existing embedding ID: item_82_chunk_0
Add of existing embedding ID: item_82_chunk_0
Insert of existing embedding ID: item_82_chunk_1
Add of existing embedding ID: item_82_chunk_1
Insert of existing embedding ID: item_82_chunk_2
Add of existing embedding ID: item_82_chunk_2
Insert of existing embedding ID: item_

✅ Processed 16 chunks for text entry 81
✅ Processed 16 chunks for text entry 82
✅ Processed 4 chunks for text entry 83


Insert of existing embedding ID: item_84_chunk_10
Add of existing embedding ID: item_84_chunk_10
Insert of existing embedding ID: item_84_chunk_11
Add of existing embedding ID: item_84_chunk_11
Insert of existing embedding ID: item_84_chunk_12
Add of existing embedding ID: item_84_chunk_12
Insert of existing embedding ID: item_84_chunk_13
Add of existing embedding ID: item_84_chunk_13
Insert of existing embedding ID: item_84_chunk_14
Add of existing embedding ID: item_84_chunk_14
Insert of existing embedding ID: item_84_chunk_15
Add of existing embedding ID: item_84_chunk_15
Insert of existing embedding ID: item_84_chunk_16
Add of existing embedding ID: item_84_chunk_16
Insert of existing embedding ID: item_84_chunk_17
Add of existing embedding ID: item_84_chunk_17
Insert of existing embedding ID: item_85_chunk_0
Add of existing embedding ID: item_85_chunk_0
Insert of existing embedding ID: item_85_chunk_1
Add of existing embedding ID: item_85_chunk_1
Insert of existing embedding ID: i

✅ Processed 18 chunks for text entry 84
✅ Processed 5 chunks for text entry 85
✅ Processed 5 chunks for text entry 86
✅ Processed 15 chunks for text entry 87


Insert of existing embedding ID: item_89_chunk_0
Add of existing embedding ID: item_89_chunk_0
Insert of existing embedding ID: item_89_chunk_1
Add of existing embedding ID: item_89_chunk_1
Insert of existing embedding ID: item_89_chunk_2
Add of existing embedding ID: item_89_chunk_2
Insert of existing embedding ID: item_89_chunk_3
Add of existing embedding ID: item_89_chunk_3
Insert of existing embedding ID: item_89_chunk_4
Add of existing embedding ID: item_89_chunk_4
Insert of existing embedding ID: item_89_chunk_5
Add of existing embedding ID: item_89_chunk_5
Insert of existing embedding ID: item_89_chunk_6
Add of existing embedding ID: item_89_chunk_6
Insert of existing embedding ID: item_89_chunk_7
Add of existing embedding ID: item_89_chunk_7
Insert of existing embedding ID: item_89_chunk_8
Add of existing embedding ID: item_89_chunk_8
Insert of existing embedding ID: item_89_chunk_9
Add of existing embedding ID: item_89_chunk_9
Insert of existing embedding ID: item_90_chunk_0
A

✅ Processed 18 chunks for text entry 88
✅ Processed 10 chunks for text entry 89
✅ Processed 4 chunks for text entry 90
✅ Processed 2 chunks for text entry 91


Insert of existing embedding ID: item_92_chunk_0
Add of existing embedding ID: item_92_chunk_0
Insert of existing embedding ID: item_92_chunk_1
Add of existing embedding ID: item_92_chunk_1
Insert of existing embedding ID: item_92_chunk_2
Add of existing embedding ID: item_92_chunk_2
Insert of existing embedding ID: item_92_chunk_3
Add of existing embedding ID: item_92_chunk_3
Insert of existing embedding ID: item_93_chunk_0
Add of existing embedding ID: item_93_chunk_0
Insert of existing embedding ID: item_93_chunk_1
Add of existing embedding ID: item_93_chunk_1
Insert of existing embedding ID: item_93_chunk_2
Add of existing embedding ID: item_93_chunk_2
Insert of existing embedding ID: item_93_chunk_3
Add of existing embedding ID: item_93_chunk_3
Insert of existing embedding ID: item_93_chunk_4
Add of existing embedding ID: item_93_chunk_4
Insert of existing embedding ID: item_94_chunk_0
Add of existing embedding ID: item_94_chunk_0
Insert of existing embedding ID: item_94_chunk_1
A

✅ Processed 4 chunks for text entry 92
✅ Processed 5 chunks for text entry 93
✅ Processed 16 chunks for text entry 94


Insert of existing embedding ID: item_95_chunk_1
Add of existing embedding ID: item_95_chunk_1
Insert of existing embedding ID: item_95_chunk_2
Add of existing embedding ID: item_95_chunk_2
Insert of existing embedding ID: item_95_chunk_3
Add of existing embedding ID: item_95_chunk_3
Insert of existing embedding ID: item_95_chunk_4
Add of existing embedding ID: item_95_chunk_4
Insert of existing embedding ID: item_96_chunk_0
Add of existing embedding ID: item_96_chunk_0
Insert of existing embedding ID: item_96_chunk_1
Add of existing embedding ID: item_96_chunk_1
Insert of existing embedding ID: item_97_chunk_0
Add of existing embedding ID: item_97_chunk_0
Insert of existing embedding ID: item_97_chunk_1
Add of existing embedding ID: item_97_chunk_1
Insert of existing embedding ID: item_97_chunk_2
Add of existing embedding ID: item_97_chunk_2
Insert of existing embedding ID: item_97_chunk_3
Add of existing embedding ID: item_97_chunk_3
Insert of existing embedding ID: item_98_chunk_0
A

✅ Processed 5 chunks for text entry 95
✅ Processed 2 chunks for text entry 96
✅ Processed 4 chunks for text entry 97
✅ Processed 18 chunks for text entry 98


Insert of existing embedding ID: item_99_chunk_0
Add of existing embedding ID: item_99_chunk_0
Insert of existing embedding ID: item_99_chunk_1
Add of existing embedding ID: item_99_chunk_1
Insert of existing embedding ID: item_99_chunk_2
Add of existing embedding ID: item_99_chunk_2
Insert of existing embedding ID: item_99_chunk_3
Add of existing embedding ID: item_99_chunk_3
Insert of existing embedding ID: item_99_chunk_4
Add of existing embedding ID: item_99_chunk_4
Insert of existing embedding ID: item_99_chunk_5
Add of existing embedding ID: item_99_chunk_5
Insert of existing embedding ID: item_99_chunk_6
Add of existing embedding ID: item_99_chunk_6
Insert of existing embedding ID: item_99_chunk_7
Add of existing embedding ID: item_99_chunk_7
Insert of existing embedding ID: item_99_chunk_8
Add of existing embedding ID: item_99_chunk_8
Insert of existing embedding ID: item_99_chunk_9
Add of existing embedding ID: item_99_chunk_9
Insert of existing embedding ID: item_99_chunk_10


✅ Processed 23 chunks for text entry 99
🚀 All data stored in ChromaDB successfully!


In [10]:
def query_chroma(query_text, collection, top_k=3):
    """Retrieve the most relevant text chunks from ChromaDB based on query."""
    
    # Generate embedding for the query
    query_embedding = embedding_model.encode([query_text])

    # Perform similarity search in ChromaDB
    results = collection.query(
        query_embeddings=query_embedding.tolist(),
        n_results=top_k  # Retrieve top K most relevant results
    )

    # Display retrieved results
    print(f"\n🔍 Query: {query_text}\n")
    for i, match in enumerate(results["metadatas"][0]):  # First query result batch
        print(f"✨ Match {i+1}:")
        print(f"🔹 Original ID: {match['original_id']}")
        print(f"🔹 Chunk Index: {match['chunk_index']}")
        print(f"🔹 Text: {match['text']}\n")
        print("=" * 80)

In [27]:
# Example Query
query_text = "I love cats and I want a mobile phone cover with multiple colors for samsung"
query_chroma(query_text, collection, top_k=5)


🔍 Query: I love cats and I want a mobile phone cover with multiple colors for samsung

✨ Match 1:
🔹 Original ID: 147
🔹 Chunk Index: 1
🔹 Text: _ tag : en _ in, slim hard back cover, language _ tag : en _ in, no warranty color : language _ tag : en _ in, standardized _ values : multi - colored, others item _ id : b07syj1n41 item _ name : language _ tag : en _ in, amazon brand - solimo designer cute pink cat 3d printed hard back case mobile cover for samsung galaxy core 2 g355h item _ weight : normalized _ value : unit : pounds, 0. 110231131, unit : grams, 50 model _ name : language _ tag : en _ in,

✨ Match 2:
🔹 Original ID: 176
🔹 Chunk Index: 1
🔹 Text: en _ in, slim hard back cover, language _ tag : en _ in, no warranty color : language _ tag : en _ in, standardized _ values : multi - colored, others item _ id : b07t9tn9z9 item _ name : language _ tag : en _ in, amazon brand - solimo designer pink flower patterns 3d printed hard back case mobile cover for samsung galaxy j6 plus item _ 

In [26]:
print(formatted_data[147])

Brand: language_tag: en_IN, Amazon Brand - Solimo
Bullet_point: language_tag: en_IN, 3D Printed Hard Back Case Mobile Cover for Samsung Galaxy Core 2 G355H, language_tag: en_IN, Easy to put & take off with perfect cutouts for volume buttons, audio & charging ports., language_tag: en_IN, Stylish design and appearance, express your unique personality., language_tag: en_IN, Extreme precision design allows easy access to all buttons and ports while featuring raised bezel to life screen and camera off flat surface., language_tag: en_IN, Slim Hard Back Cover, language_tag: en_IN, No Warranty
Color: language_tag: en_IN, standardized_values: multi-colored, Others
Item_id: B07SYJ1N41
Item_name: language_tag: en_IN, Amazon Brand - Solimo Designer Cute Pink Cat 3D Printed Hard Back Case Mobile Cover for Samsung Galaxy Core 2 G355H
Item_weight: normalized_value: unit: pounds, 0.110231131, unit: grams, 50
Model_name: language_tag: en_IN, Samsung Galaxy Core 2 G355H
Model_number: gz8015-SL40450
Prod