In [2]:
from dotenv import load_dotenv

load_dotenv()

from langchain.document_loaders import Docx2txtLoader

loader = Docx2txtLoader("data/Team () - EPAM AI Hackathon Group Connect_2023-12-13.docx")
doc = loader.load()
print(len(doc[0].page_content))

37338


In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    separators=["\n", " ", "\t", "\r", "\f", "\v"], chunk_size=1000,
    chunk_overlap=100
)

docs = text_splitter.create_documents([doc[0].page_content])

len(docs[0].page_content)
len(docs)

42

In [4]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

In [6]:
from langchain.vectorstores import Chroma

db = Chroma.from_documents(docs, embeddings)

In [7]:
retriever = db.as_retriever()

In [8]:
results = retriever.get_relevant_documents("How are Laura?")

In [9]:
from langchain.llms import OpenAI

In [10]:
llm = OpenAI(
    temperature=0.5, max_tokens=800, top_p=1.0
)

In [11]:
from langchain.chains.summarize import load_summarize_chain

In [12]:
summary_chain = load_summarize_chain(llm=llm, chain_type='map_reduce', verbose=True)

In [13]:
output = summary_chain.run(docs)



[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"0:0:0.0 --> 0:0:4.470
Laura Hunter
Even a quick connect with everyone else and freeing up some time and then consolidate and notes.

0:0:4.480 --> 0:0:5.970
Laura Hunter
So we're kind of all on the same page.

0:0:5.980 --> 0:0:8.380
Laura Hunter
It'll be really tough to get everyone together, especially this time of year.

0:0:8.870 --> 0:0:11.160
Laura Hunter
Umm, but just to get things started.

0:0:11.170 --> 0:0:11.540
Laura Hunter
My name.

0:0:11.550 --> 0:0:15.500
Laura Hunter
Laura and I'm a senior recruiter in our Newcastle office.

0:0:15.550 --> 0:0:18.540
Laura Hunter
It's coming up to three years in January.

0:0:18.550 --> 0:0:19.680
Laura Hunter
I'll have been at EPAM.

0:0:20.530 --> 0:0:34.590
Laura Hunter
Predominantly I hire for our deep practice, but I've covered lots from product rol

In [14]:
output

' Laura Hunter and Judit Timar, senior recruiters in the Newcastle office, are participating in their first ever hackathon. They discuss the purpose of the board and the importance of respecting different thoughts and personalities. They also discuss ideas for a message, and Monica Caruana offers some of her own. Pavel Kiadrynski shares his expertise in developing complex systems and machine learning models, and Dennis introduces himself. Monica Caruana shares documents, challenges, and important dates related to the hackathon, and they discuss ideas for a project and how to make the website more accessible for all users. They agree to have a meeting the next day to discuss tips and techniques to navigate a start time period.'

In [15]:
from langchain.chains.question_answering import load_qa_chain

In [16]:
chain = load_qa_chain(llm, chain_type="stuff")

In [17]:
query = "Who are participants?"
results = db.similarity_search(query)
chain.run(input_documents=results, question=query)

' The participants are Pavel Kiadrynski, Laura Hunter, Megan McMillan, Judit Timar, and Monica Caruana.'

In [19]:
query = "Who is Pavel Kiadrynski? What skills and experience does he have?"
results = db.similarity_search(query)
chain.run(input_documents=results, question=query)

' Pavel Kiadrynski is a Python literature engineer with over 14 years of experience. He has a deep understanding of programming and enjoys challenges that come with developing complex systems. He has worked on a wide range of projects, from mobile web applications to machine learning models. He is a lifelong learner who is always looking for ways to improve his craft.'

In [41]:
query = "Who is Judit Timar? What skills and experience does he have?"
results = db.similarity_search(query)
chain.run(input_documents=results, question=query)

' Judit Timar is a project manager and business analyst from Hungary who relocated to Germany five years ago. She has experience with project management and business analysis, and her passion is to build a diverse team for a project, taking into account not only gender, nationality, religion, but also personality, whether someone is an extrovert or introvert.'

In [42]:
query = "Who is Megan McMillan? What skills and experience does he have?"
results = db.similarity_search(query)
chain.run(input_documents=results, question=query)

' Megan McMillan is the office manager in Newcastle. She does not have any tech programming experience but is interested in contributing to the hackathon in any way she can.'

In [44]:
query = "What ideas do they have for hackathon?"
results = db.similarity_search(query)
chain.run(input_documents=results, question=query)

' They have discussed potential ideas such as improving pre-boarding processes and driving inclusivity and equity within technology.'