## Install requirements

This first section installs all the dependencies we need

In [29]:
# Install required dependencies
%pip install tiktoken==0.3.3
%pip install openai==0.27.2
%pip install langchain==0.0.134
%pip install python-dotenv==1.0.0
%pip install faiss-cpu==1.7.3

# Make sure required folders exist
import os
folder_paths = ['resources', 'output']

for path in folder_paths:
    if not os.path.exists(path):
        os.mkdir(path)

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


## Import our OpenAI API Key

You will need an OpenAI API key for these examples to work.
Currently, only paid plans are available.

Key is sourced from a `.keys` file located in the project folder and then exposed as an env var.
The content of the `.keys` file should be:
```
OPENAI_API_KEY=<secret_key>
```

In [18]:
import os
from dotenv import dotenv_values

api_keys = dotenv_values('.keys')

os.environ['OPENAI_API_KEY'] = api_keys['OPENAI_API_KEY']

## Load and clean Gradle single page documentation

In [19]:
# Load the Gradle single page HTML documentation and clean it up
# TODO Duplicate manual clean of removing all header content, css and TOC manually
import re

# Load a local html file
with open('resources/gradle.html', 'r') as f:
    text = f.read()

# Remove all HTML tags
cleaned_gradle = re.sub(r'<[^>]*>', '', text)    

# Remove blank lines
cleaned_gradle = re.sub(r'^\s*$', '', cleaned_gradle, flags=re.MULTILINE)

# Remove the \n\n that are left over
cleaned_gradle = cleaned_gradle.replace('\n\n', '\n')

# Save the text
with open('output/gradle_cleaned.txt', 'w') as f:
    f.write(cleaned_gradle)

## Split our content in chunks

In [20]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 2000,
    chunk_overlap  = 20,
    length_function = len,
)

texts = text_splitter.create_documents([cleaned_gradle])

len(texts)

954

## Embed and store the texts
This uses an in-memory vector database: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/faiss.html

In [21]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

embeddings = OpenAIEmbeddings()

# Embed and store the texts
vectorstore = FAISS.from_documents(texts, embeddings)

## Now wire these with OpenAI
Lots of mystery in there for now ...

In [27]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

llm = OpenAI()

query = "How do I declare that a dependency should use versions between 1 and 2 excluded, with 1.4.3 blacklisted? And can you add an example?"

search_results = vectorstore.similarity_search(query)

chain = load_qa_chain(llm, chain_type="stuff")
result = chain.run(input_documents=search_results, question=query)

with open('output/prompts.txt', 'a') as f:
    f.write('Q: ' + query + '\n')
    f.write('A: ' + result + '\n')

result


' You can use a combination of the require and reject terms. For example:\nrequire("[1.0,2.0)")\nreject("1.4.3")'