In [10]:
#import data using langchain and feed it into a vectorstore for use by gpt3.5
from langchain.text_splitter import CharacterTextSplitter as CTS
from langchain.document_loaders import DirectoryLoader as DL
from langchain.document_loaders import CSVLoader as CSVL
from langchain.vectorstores.faiss import FAISS
from langchain.embeddings import OpenAIEmbeddings
import pickle
import config
import os

os.environ["OPENAI_API_KEY"] = config.gptkey

def data_train():
    loader = DL("data/")
    raw_docs = loader.load()

    text_splitter = CTS(
        separator = "\n\n",
        chunk_size = 600,
        chunk_overlap = 100,
        length_function = len,
    )
    
    docs = text_splitter.split_documents(raw_docs)
    
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_documents(docs, embeddings)
    
    #workaround to pickling the vectorstore
    vectorstore.save_local("vectorstore")

    return print("done!")

data_train()

Created a chunk of size 616, which is longer than the specified 600
Created a chunk of size 716, which is longer than the specified 600
Created a chunk of size 677, which is longer than the specified 600
Created a chunk of size 637, which is longer than the specified 600
Created a chunk of size 634, which is longer than the specified 600
Created a chunk of size 612, which is longer than the specified 600
Created a chunk of size 627, which is longer than the specified 600
Created a chunk of size 720, which is longer than the specified 600
Created a chunk of size 843, which is longer than the specified 600
Created a chunk of size 999, which is longer than the specified 600
Created a chunk of size 803, which is longer than the specified 600
Created a chunk of size 679, which is longer than the specified 600
Created a chunk of size 816, which is longer than the specified 600
Created a chunk of size 616, which is longer than the specified 600
Created a chunk of size 685, which is longer tha

done!


In [11]:
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.prompts.prompt import PromptTemplate
from langchain.vectorstores.base import VectorStoreRetriever
from langchain.chat_models import ChatOpenAI

from langchain.memory import ConversationBufferMemory

#defining base prompt
template = """You are an expert at Collectible Card Games, like Magic: The Gathering. You have studied the rules and cards for another
collectible card game called Marvel SNAP, which is entirely digital and is based on popular characters and settings made by Marvel Comics.
Your role is to coach Marvel SNAP players on how to improve their gameplay and recommend possible 12-card deck combinations.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""

base_prompt = PromptTemplate.from_template(template)

#building loaders/chatbot
def load_retriever():
    x = FAISS.load_local("vectorstore", OpenAIEmbeddings())
    retriever = VectorStoreRetriever(vectorstore=x)

    return retriever

def get_basic_qa_chain():
    llm = ChatOpenAI(model_name="gpt-4", temperature=0)
    retriever = load_retriever()
    memory = ConversationBufferMemory(
        memory_key="chat_history", return_messages=True)
    model = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        memory=memory)
    return model