In [1]:
print("hello world")

hello world


In [2]:
import os
#to change dir. to working folder
os.chdir("../")

In [3]:

#Imports pdf readers, since we are working with pdf file    
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [4]:
#Extract Data from PDF File
def load_pdf_file(data):
    #Take data directory and only loads the "pdf" tags
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)

    documents=loader.load()

    return documents

In [5]:
#Use method above and set equal to extracted_data
extracted_data = load_pdf_file(data = 'Data/')

In [6]:
#Perform Chunking Operation: Which divdes our large dataset,
#into smaller manageable datasets

def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks



In [7]:
#Executing above method
text_chunks = text_split(extracted_data)
print("length of text chunks", len(text_chunks))

length of text chunks 265


In [8]:
from langchain.embeddings import HuggingFaceEmbeddings

In [14]:
#Perform Embedding model
#Download the Embeddings from Hugging Face
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/msmarco-MiniLM-L-6-v3')
    return embeddings

In [15]:
embeddings = download_hugging_face_embeddings()

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [None]:
#We use the above embeddings to create vector embeddings
#Which are numerical representations of data that can be used for ML processing
#Pinecone is a db that allows you to quickly manage and search datasets with predefined embeddings


In [17]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os

pc = Pinecone(api_key="pcsk_7PAbgS_55bDeai2Mmn87jTphnC74CVrpYL2goV7477hd7tnEZaZpthjhjJtrmGGdvMHQjM")

index_name = "footballchat"

pc.create_index(
    name = index_name,
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud='aws',
        region='us-east-1'
    )
)


In [24]:
import os
os.environ['OPENAI_API_KEY'] = ""
os.environ['PINECONE_API_KEY'] = ""


In [19]:
#Now we embed each chunk and upsert the embeddings into Pinecone Index
#Converts all embeddings into vector embeddings and stores in pinecone db
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents = text_chunks,
    index_name = index_name,
    embedding= embeddings
)

In [20]:
#Load Existing index, this is how we search now off of pinecone db

from langchain_pinecone import PineconeVectorStore
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)


In [21]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [22]:
retrieved_docs = retriever.invoke("What is a VEER?")

In [23]:
#Preview of response, now we process it with LLM
retrieved_docs

[Document(id='8f2f49cf-e265-42bf-aa64-1835c11c5661', metadata={'page': 53.0, 'source': 'Data\\footballxos-spread-offense-playbook.pdf'}, page_content='VEER\nVEER RIGHT vs 4-3\nVEER RIGHT vs 4-3 Under \nMinnesota Vikings – Hug Self as if Cold \nVEER RIGHT vs 4-4 Stack \nVEER RIGHT vs 4-4\nVEER RIGHT vs 5-2\nVEER RIGHT vs 3-4\nVEER RIGHT vs 3-5\nVEER RIGHT vs Bear'),
 Document(id='545d81f5-d2f9-4848-9891-da7a51e91e66', metadata={'page': 52.0, 'source': 'Data\\footballxos-spread-offense-playbook.pdf'}, page_content='VEER\nVEER RIGHT vs 4-3\nVEER RIGHT vs 4-3 Under \nMinnesota Vikings – Hug Self as if Cold \nVEER RIGHT vs 4-4 Stack \nVEER RIGHT vs 4-4\nVEER RIGHT vs 5-2\nVEER RIGHT vs 3-4\nVEER RIGHT vs 3-5\nVEER RIGHT vs Bear'),
 Document(id='29ce8fce-0b2a-485a-8477-cd03b463f486', metadata={'page': 51.0, 'source': 'Data\\footballxos-spread-offense-playbook.pdf'}, page_content='VEER\nQB \nSecure snap moving towards the LOS, place ball in the gut of the RB with your eyes directly on the han

In [25]:
from langchain_openai import OpenAI
llm = OpenAI(temperature=0.4, max_tokens = 500)

In [26]:
#Process of preparing the llm with prompts to process our db
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks."
    "Use the following pieces of retrieved context to answer"
    "the question. If you dont know the answer, say that you "
    "don't know. Use Three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    {
        ("system", system_prompt),
        ("human", "{input}"),
    }
)


In [28]:
#The process of RAG(Retrieval Augmented Generation) is the process of optimizing a model by
#first referring to a designated knowledge base
#So were telling the llm to first based its answer off of the base we made above

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


In [30]:
response = rag_chain.invoke({"input": "What is VEER?"})
print(response["answer"])


Must get a pre-snap read to see aiming point (outside leg of first down lineman inside of the handoff key), on 
snap, shuffle towards QB and give a loose pocket for the ball, attack aiming point

VEER is a type of offensive play in American football where the quarterback secures the snap and hands the ball off to the running back while reading the block of the slot receiver. It is commonly used against defensive formations such as 4-3, 4-4, 5-2, 3-4, 3-5, and Bear. The QB and X positions must get a pre-snap read to determine the aiming point and then execute the play accordingly.


In [33]:
response = rag_chain.invoke({"input": "Give me an example of a play to run against two high safeties"})
print(response["answer"])

 

One possible play to run against two high safeties is the "Smash-In" play. In this play, the QB's first read is to the receiver running a dig route, who pushes vertical to 10 yards before breaking straight across the field. The second read is to the receiver running a corner route, who nods and breaks at 10 yards towards the front pylon. The third read is to the receiver running a go route, aiming for 2 yards outside the hash at 22 yards. The QB should expect the ball to be thrown immediately on the Smash-In route. The offensive line should also be prepared to block for a potential run play, with the BST pulling through the hole created by the PSG. This play is particularly effective against a three-man front, but can also be run against a four-man front.
