#### Install Libraries

In [2]:
!pip install python-dotenv pypdf langchain langchain-openai langchain-pinecone -q

You should consider upgrading via the '/Users/rafraf/Documents/code/aaltoai-hackathon-24/aaltoai_venv/bin/python3 -m pip install --upgrade pip' command.[0m


#### Import libraries

In [4]:
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain

from dotenv import load_dotenv, find_dotenv
import os

PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

load_dotenv(find_dotenv())

True

#### Load Data

In [6]:
loader = PyPDFLoader("../data/stihl.pdf")
data = loader.load()

In [16]:
print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your sample document')
print (f'Here is a sample: {data[120].page_content}')

You have 247 document(s) in your data
There are 23 characters in your sample document
Here is a sample: Aerators in detail
ƒ
Technology
01STIHL LITHIUM-ION BATTERY TECHNOLOGY
Operation with STIHL AK batteries enables comfortable and low-noise work.
Convenience
02HEIGHT-ADJUSTABLE HANDLEBAR
In the RLA 240 and RLE 240, the handlebar can be easily adjusted to four diﬀerent heights. In addition, it is foldable in three places, enabling easy transport and space-saving storage.
03GRASS CATCHER BOX
The 50-litre grass catcher box is made from a tearproof woven textile. The steel frame with carry handle ensures stability and makes it comfortable to carry.
Please note that not all products are equipped with the features presented here. 
Only the most important features are described on this page. For further explanations and information, see from page 482 and online at stihl.com .
01Low-noise 
and easy to 
manoeuvre
02Ergonomic work
03Easy handling
Aerators and 
accessoriesƒ
● For private use
● 

#### Chunk Data

In [25]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
texts = text_splitter.split_documents(data)

In [26]:
print (f'Now you have {len(texts)} documents')

Now you have 1016 documents


In [57]:
texts[0]

Document(page_content='For comfortable operation where \nspace is limited. Single-handed multi-function handle, loop handle with barrier bar, STIHL 2-MIX engine, harness.\nOrder number 4144 200 0096\nFS 56\n27 .2 cm3 • 0.8 kW / 1.1 hp  • 5.1 kg①\nFor working on areas of tough \ngrass. Single-handed multi-function handle, ergonomic bike handle, STIHL 2-MIX engine, optimised trimmer angle, straight shaft, harness.\nOrder number 4144 200 0036\nAdditional versions: \nFS 56 C-E 4144 200 0095\nFS 70 C-E\n27 .2 cm3 • 0.9 kW / 1.2 hp  • 5.4 kg①\nFor mowing large areas of tough \ngrass. STIHL ErgoStart, single-handed multi-function handle, ergonomic bike handle, STIHL 2-MIX engine, harness.\nOrder number 4144 200 0076\nAdditional versions: \nFS 70 RC-E 4144 200 0105FS 89\n28.4 cm3 • 0.95 kW /1 .3  h p  • 5.8 kg①\nFor working on areas of tough', metadata={'source': './stihl.pdf', 'page': 123})

#### Pinecone: Storing Embeddings

In [65]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
index_name = "stihl"
docsearch = PineconeVectorStore.from_documents(texts, embeddings, index_name=index_name)

In [66]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)

#### Define Chain

In [68]:
llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")

  warn_deprecated(


In [83]:
query = "Give me a machine that features a single-handed operation, a loop handle with a safety barrier bar and around 25 cm³ with a high power output"
docs = docsearch.similarity_search(query)

#### Run Chain

In [84]:
chain.run(input_documents=docs, question=query)

'Based on the information provided, the STIHL FS 55 R could be a suitable option for your requirements. It features single-handed operation, a loop handle with a barrier bar, and has a 27.2 cm³ engine with a power output of 0.75 kW / 1.0 hp. While it slightly exceeds the 25 cm³ requirement, it aligns with the other specifications you mentioned.'