In [1]:
!pip install --upgrade -q langchain openai tiktoken python-dotenv GitPython anthropic pinecone-client

In [2]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from dotenv import load_dotenv
import pinecone
import os

  from tqdm.autonotebook import tqdm


In [68]:
load_dotenv()

True

In [69]:
llm = OpenAI(temperature=0)

In [70]:
from langchain.document_loaders import GitLoader

In [71]:
loader = GitLoader('repo', 
                   clone_url='https://github.com/retroam/tinygrad',
                  branch='master')

In [72]:
try:
    documents = loader.load()
except:
    pass

In [73]:
from langchain.document_loaders import TextLoader

In [74]:
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings(disallowed_special=())

Created a chunk of size 843, which is longer than the specified 500
Created a chunk of size 544, which is longer than the specified 500
Created a chunk of size 566, which is longer than the specified 500
Created a chunk of size 645, which is longer than the specified 500
Created a chunk of size 557, which is longer than the specified 500
Created a chunk of size 1933, which is longer than the specified 500
Created a chunk of size 3449, which is longer than the specified 500
Created a chunk of size 842, which is longer than the specified 500
Created a chunk of size 617, which is longer than the specified 500
Created a chunk of size 611, which is longer than the specified 500
Created a chunk of size 584, which is longer than the specified 500
Created a chunk of size 771, which is longer than the specified 500
Created a chunk of size 540, which is longer than the specified 500
Created a chunk of size 603, which is longer than the specified 500
Created a chunk of size 1674, which is longer 

In [75]:
pinecone.init(api_key=os.getenv('PINECONE_API_KEY'), environment="us-west4-gcp-free")
index = pinecone.Index("langchain-demo")

In [76]:
vectordb = Pinecone.from_documents(texts, embeddings, index_name='langchain-demo')

In [77]:
retriever = vectordb.as_retriever()

In [78]:
query = "What is tinygrad about"
docs = vectordb.similarity_search(query)

In [79]:
docs

[Document(page_content='<div align="center">\n\n[![logo](https://raw.githubusercontent.com/geohot/tinygrad/master/docs/logo.png)](https://tinygrad.org)\n\ntinygrad: For something between [PyTorch](https://github.com/pytorch/pytorch) and [karpathy/micrograd](https://github.com/karpathy/micrograd). Maintained by [tiny corp](https://tinygrad.org).\n\n<h3>\n\n[Homepage](https://github.com/geohot/tinygrad) | [Documentation](/docs) | [Examples](/examples) | [Showcase](/docs/showcase.md) | [Discord](https://discord.gg/ZjZadyC7PK)\n\n</h3>\n\n[![GitHub Repo stars](https://img.shields.io/github/stars/geohot/tinygrad)](https://github.com/geohot/tinygrad/stargazers)\n[![Unit Tests](https://github.com/geohot/tinygrad/actions/workflows/test.yml/badge.svg)](https://github.com/geohot/tinygrad/actions/workflows/test.yml)\n[![Discord](https://img.shields.io/discord/1068976834382925865)](https://discord.gg/ZjZadyC7PK)\n[![Lines of code](https://img.shields.io/tokei/lines/github/geohot/tinygrad)](https:/

In [80]:
bot = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever
)

In [81]:
# Import things that are needed generically
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.tools import BaseTool
from langchain.llms import OpenAI
from langchain import LLMMathChain, SerpAPIWrapper

In [82]:
bot.run(input_docs = docs,query="What is tinygrad about?")

' Tinygrad is a library for something between PyTorch and karpathy/micrograd, maintained by tiny corp.'

In [83]:
!pip freeze

aiohttp==3.8.4
aiosignal==1.3.1
anthropic==0.2.10
anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1685486400275/work/dist
argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1640817743617/work
argon2-cffi-bindings @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi-bindings_1666850768662/work
asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1670263926556/work
async-lru @ file:///home/conda/feedstock_root/build_artifacts/async-lru_1676985229141/work
async-timeout==4.0.2
attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1683424013410/work
Babel @ file:///home/conda/feedstock_root/build_artifacts/babel_1677767029043/work
backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1618230623929/work
beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifact