# Creating the Vector store

### Creating the Vector store and load in the documents

In [2]:
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
import os
from langchain.document_loaders import PyPDFLoader

In [3]:
#import PDF documents
directory = 'documents'
raw_docs = []
 
# iterate over files directory
for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        loader = PyPDFLoader(f)
        raw_docs.extend(loader.load())

In [4]:
#previewing the documents in the list
raw_docs[0]

Document(page_content=' \n \n  UCWDC® Rules,  \nContest Procedures  \nand Scoring Format  \n2023 - 2025 \nCOUPLES  \n \nUNITED COUNTRY WESTERN DANCE COUNCIL\uf0d2  \nCOPYRIGHT 1987,  REVISED 2023 ', metadata={'source': 'documents\\UCWDC Rules_Couples_VERIFIED_FINAL V2.pdf', 'page': 0})

In [5]:
# Load the document, split it into chunks, embed each chunk and load it into the vector store.
#raw_documents = TextLoader(r"documents\UCWDC Rules_Line Dance.pdf").load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
documents = text_splitter.split_documents(raw_docs)
db = FAISS.from_documents(documents, OpenAIEmbeddings())

In [6]:
db.save_local("FAISS_Country")

### Testing it with some code

In [7]:
from langchain.memory import ConversationSummaryMemory

In [10]:
llm = ChatOpenAI()

In [11]:
memory = ConversationSummaryMemory(
    llm=llm, memory_key="chat_history", return_messages=True
)

In [12]:
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI()
retriever = db.as_retriever()
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

In [14]:
qa("what is waltz")

{'question': 'what is waltz',
 'chat_history': [SystemMessage(content='', additional_kwargs={})],
 'answer': 'The waltz is a smooth, progressive dance that uses patterns that move diagonally and are performed with rise and fall, accentuated with swing and sway, emphasizing hover. The basic timing for the waltz is 1 2 3 4 5 6.'}

In [15]:
def handle_userinput1(user_question):
    response = qa({"question": user_question})
    chat_history = response['answer']
    return chat_history

In [16]:
qa("How many line dance categories are there")

{'question': 'How many line dance categories are there',
 'chat_history': [SystemMessage(content='The human asks what the AI thinks of artificial intelligence. The AI thinks artificial intelligence is a force for good because it will help humans reach their full potential.\n\nThe human asks the AI about the waltz. The AI explains that the waltz is a smooth, progressive dance that uses specific patterns and timing.', additional_kwargs={})],
 'answer': 'There are six categories for line dances: Rise & Fall, Pulse, Smooth, Cuban, Funky/Street, and Novelty/Stage.'}

In [17]:
handle_userinput1("are arials allowed?")

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIError: The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID req_060da41ed4718893ccd34469f7ecaaf5 in your message.) {
  "error": {
    "message": "The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID req_060da41ed4718893ccd34469f7ecaaf5 in your message.)",
    "type": "server_error",
    "param": null,
    "code": null
  }
}
 500 {'error': {'message': 'The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID req_

'No, aerials are not allowed in line dances according to the UCWDC Dance Rules.'

In [80]:
query = "Are arials allowed"
docs = db.similarity_search(query)
print(docs[1].page_content)

- 
UCWDC Dance Rules, Contest Procedures and Scoring Format – LINE DANCE              Page 6 of 16 c. Movement allowances and limitations for Line Dance Showcase and Solo Medleys within SuperStars, 
RisingStars , and Crown : 
i. A competitor  may begin the dance facing any direction of his or her choosing and may begin 
the dance at any time . 
ii. In Line Dance Showcase, a competitor  must dance repetitive patter ns according to the 
phrasing of the music.  
iii. Aerials are not allowed in Crown  Solo Medley  
iv. There are no movement limitations in SuperStars and RisingStars  Solo Medley . 
 
H. Floor Craft  
1. For all dances, competitor s shall  always be courteous on the floor , as well as vigilant so as not to interfere 
with other competitor s. 
2. For Line Dance categories, once the competitor s evenly space or stagger themselves on the floor relative to 
each other, everyone’s “dance space” is revealed. Competitor s should perform to  reasonable maintain their 
relative posit