In [None]:
# pip install langchain qdrant_client openai tiktoken streamlit

In [None]:
### DO NOT RUN ALL ###

In [None]:
### these are functions to run individually as needed to modify the Qdrant collections!! the py. files are to run the script ###

In [22]:
from dotenv import load_dotenv
import streamlit as st

In [23]:
# Imports
from langchain.vectorstores import Qdrant
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from qdrant_client import QdrantClient, models
import os


In [24]:
# create qdrant client
client = QdrantClient(
    os.getenv("QDRANT_HOST"),
    api_key=os.getenv("QDRANT_API_KEY")
)

In [25]:
# create collection

# define parameters for vectors
vectors_config = models.VectorParams(
    size=1536, # vector size for OpenAI
    distance=models.Distance.COSINE
)

# create collection
client.recreate_collection(
    collection_name=os.getenv("QDRANT_COLLECTION_NAME"),
    vectors_config=vectors_config,
)

True

In [26]:
# create vector store

embeddings = OpenAIEmbeddings()

# get collection name
vector_store = Qdrant(
    client=client, 
    collection_name=os.getenv("QDRANT_COLLECTION_NAME"), 
    embeddings=embeddings,
)

In [None]:
# # add 'extracted code' to vector store

# from langchain_text_splitters import Language, RecursiveCharacterTextSplitter

# def get_python_chunks(text):
    
#     python_splitter = RecursiveCharacterTextSplitter.from_language(
#     language=Language.PYTHON, chunk_size=50, chunk_overlap=0
# )
    
#     chunks = python_splitter.split_text(text)
#     return chunks

# with open('extracted_code.txt') as f:
#     raw_text = f.read()
    
# texts = get_python_chunks(raw_text)

# vector_store.add_texts(texts)

In [None]:
# add 'extracted text' to vector store

from langchain_text_splitters import RecursiveCharacterTextSplitter

def get_text_chunks(text):
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len,
        is_separator_regex=False,
    )
     
    chunks = text_splitter.split_text(text)
    return chunks

with open('combined_markdown_and_code.txt') as f:
    raw_text = f.read()
    
texts = get_text_chunks(raw_text)

vector_store.add_texts(texts)

In [21]:
from langchain_community.document_loaders import PyPDFLoader
# add 'extracted pdf' to vector store

from langchain_text_splitters import RecursiveCharacterTextSplitter

def get_pdf_chunks(pdf_filepath):
    loader = PyPDFLoader(pdf_filepath)
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
    )
     
    chunks = text_splitter.split_text(pdf_filepath)
    return chunks


slides_filepath = 'Slides'
for slide in os.listdir(slides_filepath):
    texts = get_text_chunks(slide)
    vector_store.add_texts(texts)

In [28]:
# add transcipts to vector store

from langchain_text_splitters import RecursiveCharacterTextSplitter

def get_text_chunks(text_filepath):
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len,
        is_separator_regex=False,
    )
    chunks = text_splitter.split_text(text_filepath)
    return chunks


transcripts_filepath = 'Transcripts'
for transcript in os.listdir(transcripts_filepath):
    texts = get_text_chunks(transcript)
    vector_store.add_texts(texts)

In [None]:
# delete collection

client.delete_collection(collection_name=os.getenv("QDRANT_COLLECTION_NAME"))

In [None]:
from langchain.schema import retriever

from langchain_community.llms import OpenAI
from langchain.chains import RetrievalQA

# add vector store to retrieval chain

retrievalQA = RetrievalQA.from_llm(
    llm=OpenAI(),
    retriever=vector_store.as_retriever()
    )

In [None]:
# # conversational retrieval qa
# from langchain.chains import ConversationalRetrievalChain
# from langchain.chat_models import ChatOpenAI

# llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=1)
# crc = ConversationalRetrievalChain.from_llm(llm,retriever)

In [None]:
query = "What topic are you knowledgable about?"

response = retrievalQA.run(query)

print(response)

In [None]:
# playing with setting up prompt template
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, FewShotPromptTemplate

# define model
llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=1)

# Define a prefix that explains the prompt.
prefix = """
Here are examples between a human and AI. The human provides a word, and
the AI provides a single sentence with easy to read words that mostly rhyme
with the word the human provided. The sentence does not have to include the 
original word. For example:
"""

# Create examples.
examples = [
    {
        "query": "rat",
        "answer": "The cat sat next to the bat."
    }, {
        "query": "frog",
        "answer": "A dog hops a log in the bog."
    }, {
        "query": "ten",
        "answer": "Ben sent ten hens to the glen."
    }
]

# Define a format for the examples.
example_format = """
Human: {query}
AI: {answer}
"""

# Create a prompt template for the examples.
example_template = PromptTemplate(
    input_variables=["query", "answer"],
    template=example_format
)

# Provide a suffix that includes the query.
suffix = """
Human: {query}
AI: 
"""

# Construct the few shot prompt template.
prompt_template = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_template,
    input_variables=["query"],
    prefix=prefix,
    suffix=suffix,
    example_separator="\n\n"
)

crc = ConversationalRetrievalChain.from_llm(llm,retriever,prompt=prompt_template)