In [1]:
import os
import requests
import sys

from bs4 import BeautifulSoup
from collections import deque
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from urllib.parse import urljoin, urlparse

In [2]:
load_dotenv()
assert os.environ["OPENAI_API_KEY"] != ""

In [3]:
dependency = "fastapi"

In [4]:
version_old = "0.79.0"

In [5]:
!{sys.executable} -m pip install {dependency}=={version_old}

Collecting fastapi==0.79.0
  Using cached fastapi-0.79.0-py3-none-any.whl (54 kB)
Collecting starlette==0.19.1
  Using cached starlette-0.19.1-py3-none-any.whl (63 kB)
Installing collected packages: starlette, fastapi
  Attempting uninstall: starlette
    Found existing installation: starlette 0.27.0
    Uninstalling starlette-0.27.0:
      Successfully uninstalled starlette-0.27.0
  Attempting uninstall: fastapi
    Found existing installation: fastapi 0.100.0
    Uninstalling fastapi-0.100.0:
      Successfully uninstalled fastapi-0.100.0
Successfully installed fastapi-0.79.0 starlette-0.19.1
You should consider upgrading via the '/Users/justin/.pyenv/versions/3.8.16/bin/python3.8 -m pip install --upgrade pip' command.[0m[33m
[0m

In [6]:
src = f'/Users/justin/.pyenv/versions/3.8.16/lib/python3.8/site-packages/{dependency}'
loader = DirectoryLoader(src, glob="**/*.py", show_progress=True)
docs = loader.load()

100%|███████████████████████████████████████████████| 42/42 [00:02<00:00, 19.46it/s]


In [7]:
for doc in docs:
    doc.metadata['version'] = version_old
print(f'We have {len(docs)} document(s)')

We have 42 document(s)


In [63]:
docs[0]

Document(page_content='from enum import Enum from typing import Any, Callable, Dict, Optional, Sequence\n\nfrom pydantic.fields import FieldInfo, Undefined\n\nclass ParamTypes(Enum):\n\nquery = "query"\n\nheader = "header"\n\npath = "path"\n\ncookie = "cookie"\n\nclass Param(FieldInfo):\n\nin_: ParamTypes\n\ndef __init__( self, default: Any = Undefined, *, alias: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, gt: Optional[float] = None, ge: Optional[float] = None, lt: Optional[float] = None, le: Optional[float] = None, min_length: Optional[int] = None, max_length: Optional[int] = None, regex: Optional[str] = None, example: Any = Undefined, examples: Optional[Dict[str, Any]] = None, deprecated: Optional[bool] = None, include_in_schema: bool = True, **extra: Any, ): self.deprecated = deprecated self.example = example self.examples = examples self.include_in_schema = include_in_schema super().__init__( default=default, alias=alias, title=title, descr

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(docs)

In [9]:
print(f'We have {len(docs)} document(s)')

We have 244 document(s)


In [10]:
version_new = "0.100.0"

In [11]:
!{sys.executable} -m pip install {dependency}=={version_new}

Collecting fastapi==0.100.0
  Using cached fastapi-0.100.0-py3-none-any.whl (65 kB)
Collecting starlette<0.28.0,>=0.27.0
  Using cached starlette-0.27.0-py3-none-any.whl (66 kB)
Installing collected packages: starlette, fastapi
  Attempting uninstall: starlette
    Found existing installation: starlette 0.19.1
    Uninstalling starlette-0.19.1:
      Successfully uninstalled starlette-0.19.1
  Attempting uninstall: fastapi
    Found existing installation: fastapi 0.79.0
    Uninstalling fastapi-0.79.0:
      Successfully uninstalled fastapi-0.79.0
Successfully installed fastapi-0.100.0 starlette-0.27.0
You should consider upgrading via the '/Users/justin/.pyenv/versions/3.8.16/bin/python3.8 -m pip install --upgrade pip' command.[0m[33m
[0m

In [12]:
src = f'/Users/justin/.pyenv/versions/3.8.16/lib/python3.8/site-packages/{dependency}'
loader = DirectoryLoader(src, glob="**/*.py", show_progress=True)
docs2 = loader.load()

100%|███████████████████████████████████████████████| 43/43 [00:01<00:00, 38.98it/s]


In [13]:
for doc in docs2:
    doc.metadata['version'] = version_new
print(f'We have {len(docs2)} document(s)')
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs2 = text_splitter.split_documents(docs2)
print(f'We have now have {len(docs2)} document(s)')

We have 43 document(s)
We have now have 332 document(s)


In [14]:
docs2[0]



In [15]:
docs.extend(docs2)

In [16]:
print(f'We have {len(docs)} combined document(s)')

We have 576 combined document(s)


In [17]:
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(docs, embeddings)

In [27]:
model_id = "text-similarity-davinci-001"
llm = OpenAI(temperature=0, openai_api_key=os.environ["OPENAI_API_KEY"], model=model_id)
chain = load_qa_chain(llm, chain_type="stuff")

In [28]:
query = "List all the python functions in version 0.79.0"
d = db.similarity_search(query, include_metadata=True)
chain.run(input_documents=d, question=query)

" Here's a list of functions in version 0 FastAPI 0.79.0: from .applications import FastAPI as FastAPI from .background import BackgroundTasks as BackgroundTasks from .datastructures import UploadFile as UploadFile from .exceptions import HTTPException as HTTPException from .exceptions import WebSocketException as WebSocketException from .param_functions import Body as Body from .param_functions import Cookie as Cookie from .param_functions import Depends as Depends from .param_functions import File as File from .param_functions import Form as Form from .param_functions import Header as Header from .param_functions import Path as Path from .param_functions import Query as Query from .param_functions import Security as Security from .requests import Request as Request from .responses import Response as Response from .routing import APIRouter as APIRouter from .websockets import WebSocket as WebSocket from .websockets import WebSocketDisconnect as WebSocketDisconnect Question: List all t

In [29]:
query = "List all the python functions in version 0.100.0"
d = db.similarity_search(query, include_metadata=True)
chain.run(input_documents=d, question=query)

' import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlete import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starlette import starle