In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [11]:
from google.oauth2 import service_account
from langchain_google_community.gcs_file import GCSFileLoader
from langchain.document_loaders import TextLoader
import os

In [7]:

credentials = service_account.Credentials.from_service_account_file('credentials.json')

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"

In [12]:
def load_text(file_path):
    return TextLoader(file_path)

In [20]:
loader = GCSFileLoader(project_name=credentials.project_id,
                       bucket="harmon-kennedy",
                       blob="1st-nixon-kennedy-debate-19600926.txt",
                       loader_func=load_text)

In [33]:
documents = loader.load()

In [34]:
print(documents[0].page_content)


[Text, format, and style are as published in Freedom of Communications: Final Report of the Committee on Commerce, United States Senate..., Part III: The Joint Appearances of Senator John F. Kennedy and Vice President Richard M. Nixon and Other 1960 Campaign Presentations. 87th Congress, 1st Session, Senate Report No. 994, Part 3. Washington: U.S. Government Printing Office, 1961.]
Monday, September 26, 1960
Originating CBS, Chicago, Ill., All Networks carried.
Moderator, Howard K. Smith.
MR. SMITH: Good evening.
The television and radio stations of the United States and their affiliated stations are proud to provide facilities for a discussion of issues in the current political campaign by the two major candidates for the presidency.
The candidates need no introduction. The Republican candidate, Vice President Richard M. Nixon, and the Democratic candidate, Senator John F. Kennedy.
According to rules set by the candidates themselves, each man shall make an opening statement of approx

In [31]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [35]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

In [36]:
from langchain_ollama import OllamaEmbeddings

embedding = OllamaEmbeddings(
    model="llama3:latest"
)

In [37]:
from langchain_chroma.vectorstores import Chroma

In [38]:
vector_store = Chroma(
    collection_name="first-debate",
    embedding_function=embedding,
    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)

In [51]:
vectordb = vector_store.from_documents(documents=texts,
                        embedding=embedding,
                        persist_directory="data",
                        collection_name="first-debate")

In [65]:
results = vectordb.similarity_search_by_vector(
    embedding=embedding.embed_query("What did Kennedy think about Lincoln?"), k=5
)

In [66]:
results

[Document(metadata={'source': 'gs://harmon-kennedy/1st-nixon-kennedy-debate-19600926.txt'}, page_content="And if we meet our responsibilities, I think freedom will conquer. If we fail--if we fail to move ahead, if we fail to develop sufficient military and economic and social strength here in this country, then I think that the tide could begin to run against us, and I don't want historians 10 years from now, to say, these were the years when the tide ran out for the United States. I want them to say, these were the years when the tide came in, these were the years when the United States started to move again. That's the question before the American people, and only you can decide what you want, what you want this country to be, what you want to do with the future.\nI think we're ready to move. And it is to that great task, if we are successful, that we will address ourselves.\nMR. SMITH: Thank you very much, gentlemen.\nThis hour has gone by all too quickly. Thank you very much for pe

In [67]:
from langchain_ollama import ChatOllama

from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

In [68]:
template = hub.pull("langchain-ai/retrieval-qa-chat")



In [79]:
llm = ChatOllama(model="llama3", temperature=0)

In [80]:
stuff_documents_chain = create_stuff_documents_chain(llm, template)

In [81]:
rag_chain = create_retrieval_chain(
                    vectordb.as_retriever(), 
                    stuff_documents_chain
)

In [82]:
response = rag_chain.invoke({"input": "How Did Kennedy feel about Lincoln?"})

In [84]:
response["answer"]

"The context does not mention John F. Kennedy's feelings about Abraham Lincoln. It appears to be a transcript of a speech or debate between two politicians, likely Richard Nixon and John F. Kennedy, during the 1960 presidential election. There is no mention of Lincoln in this specific passage."