In [1]:
import os
import openai
import langchain
from langchain.prompts import ChatPromptTemplate
from langchain.vectorstores import FAISS
from langchain.document_loaders import CSVLoader
from langchain.embeddings.base import Embeddings
from langchain.chains import RetrievalQA
from langchain.indexes import VectorstoreIndexCreator
import pandas as pd
import tiktoken
import json
import warnings
warnings.filterwarnings("ignore")

In [2]:
index_name = "CPT_index"
file_name = "./data/CPT.csv"

In [3]:
openai_api_key = os.environ['OPENAI_API_KEY']
client = openai.OpenAI(
    api_key=openai_api_key,
    base_url="https://cmu.litellm.ai",
)

In [6]:
class CustomOpenAIEmbeddings(Embeddings):
    def __init__(self, client):
        self.client = client

    def embed_documents(self, texts):
        embeddings = []
        counter = 0
        for text in texts:
            if(counter%100==0):
                print(counter)
            counter +=1
            response = self.client.embeddings.create(input=text, model="text-embedding-3-small")
            embedding = response.data[0].embedding
            embeddings.append(embedding)
        return embeddings

    def embed_query(self, text):
        response = self.client.embeddings.create(input=text, model="text-embedding-3-small")
        return response.data[0].embedding

embedding_model = CustomOpenAIEmbeddings(client)

In [5]:
loader = CSVLoader(file_path=file_name)
documents = loader.load()
print(len(documents))

1214


In [7]:
if not os.path.exists(index_name):
    document_texts = [doc.page_content for doc in documents]
    document_embeddings = embedding_model.embed_documents(document_texts)

    text_embedding_pairs = zip(document_texts, document_embeddings)
    vector_store = FAISS.from_embeddings(text_embedding_pairs, embedding_model.embed_query)

    vector_store.save_local(index_name)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200


`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.
