<a href="https://colab.research.google.com/github/scorzo/inmem-vector-store/blob/main/inmem_vector_store.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas
!pip install langchain
!pip install sentence-transformers
!pip install chromadb

In [None]:
import pandas as pd
from langchain.document_loaders import DataFrameLoader
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

# Read the CSV file into a DataFrame
support_tickets_df = pd.read_csv("sample_data/support_tickets.csv")

# Add the `text` column to the DataFrame
support_tickets_df["text"] = support_tickets_df["Support Issue Description"]

# Load the DataFrame using DataFrameLoader
loader = DataFrameLoader(support_tickets_df)
documents = loader.load()

# Split the documents into chunks (if needed)
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# Create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Load the documents into Chroma
db = Chroma.from_documents(docs, embedding_function)

# Example query: find similar issues
query = "Computer issues with video conferencing"
docs = db.similarity_search(query)

# Print results (ticket ID and issue description)
for doc in docs:
    print(f"Ticket ID: {doc.metadata['Ticket ID']}, Issue: {doc.metadata['Support Issue Description']}")