### Demonstrate ingesting a CSV file into a Chroma Vector database

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

import warnings
warnings.filterwarnings('ignore')

In [2]:
import semantic_kernel as sk
import os
from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion

kernel=sk.Kernel()
api_key = os.environ['OPENAI_API_KEY']
   

In [3]:
# Read the CSV file using pandas
import pandas as pd
df = pd.read_csv('OutdoorClothingCatalog_1000.csv')

In [4]:
#Specify the model used for embedding. Here we use the OpenAI Ada embedding model
from semantic_kernel.connectors.ai.open_ai import OpenAITextEmbedding
kernel.add_text_embedding_generation_service(
        "ada", OpenAITextEmbedding("text-embedding-ada-002", api_key)
    )

<semantic_kernel.kernel.Kernel at 0x7f65e84f7c70>

In [5]:
# Specify the type of memory to attach to SK. Here we will use Chroma as it is easy to run it locally
# You can specify location of Chroma DB files. The DB will be stored in "catalog" directory under current dir
from semantic_kernel.connectors.memory.chroma import ChromaMemoryStore
kernel.register_memory_store(memory_store=ChromaMemoryStore(persist_directory='catalog'))

In [6]:
# Iterate thru the pandas dataframe and embed each row and save into memory (Chroma)
async def populate_db(kernel: sk.Kernel, df) -> None:
    # Add some documents to the semantic memory using save_information_async
    for index, row in df.iterrows():
        x= row["name"] + " :  " + row["description"]
        await kernel.memory.save_information_async(
        "outdoordb", id=str(index), text=x
    )

In [7]:
# This may take some time as we call OpenAI embedding API for each row
await populate_db(kernel,df)

In [12]:
query ="Please suggest a shirt with sunblocking"

In [13]:
# Now query the memory for most relevant match using search_async specifying relevance score and "limit" of number of closest documents
query_result1 = await kernel.memory.search_async(collection="outdoordb", limit=1, min_relevance_score=0.3, query=query)

In [14]:
len(query_result1)

1

In [15]:
query_result1[0].text

'Sun Shield Shirt by  :  "Block the sun, not the fun – our high-performance sun shirt is guaranteed to protect from harmful UV rays. \n\nSize & Fit: Slightly Fitted: Softly shapes the body. Falls at hip.\n\nFabric & Care: 78% nylon, 22% Lycra Xtra Life fiber. UPF 50+ rated – the highest rated sun protection possible. Handwash, line dry.\n\nAdditional Features: Wicks moisture for quick-drying comfort. Fits comfortably over your favorite swimsuit. Abrasion resistant for season after season of wear. Imported.\n\nSun Protection That Won\'t Wear Off\nOur high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun\'s harmful rays. This fabric is recommended by The Skin Cancer Foundation as an effective UV protectant.'