In [21]:
%load_ext autoreload
%autoreload 2

In [32]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader, DirectoryLoader, UnstructuredMarkdownLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores.chroma import Chroma
from langchain.vectorstores import FAISS
from langchain.callbacks import StdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

In [23]:

chat_llm = ChatOpenAI(temperature=0.9, model="gpt-3.5-turbo", callbacks=[StdOutCallbackHandler()])

In [24]:
chat_llm.callbacks

[<langchain.callbacks.stdout.StdOutCallbackHandler at 0x1381d0b10>]

In [25]:
## running by constructing a chain, prompt and template explicityl
llm = OpenAI(temperature=0.9, model="text-davinci-003", verbose=True)
handler = StdOutCallbackHandler()
prompt = PromptTemplate.from_template("What would be a good company name for a company that makes colorful {item}?")
chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
chain.run(item="shoes")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWhat would be a good company name for a company that makes colorful shoes?[0m

[1m> Finished chain.[0m


'\n\nRainbow Footwear.'

In [26]:
text = "What would be a good company name for a company that makes colorful socks?"
print(chat_llm([HumanMessage(content=text)]))

content='VibrantSox \n\nColorPopSocks \n\nRainbowThread \n\nSockSpectrum \n\nChromaSocks \n\nColorBlendCo \n\nHueHosiery \n\nChromaticSocks \n\nColorCraze \n\nTintedToes \n\nChromaToeCo \n\nColorSplashSocks \n\nVividVibeSocks \n\nRainbowStitch \n\nSockSwatch \n\nSpectrumThreads \n\nChromaWeave \n\nColorFeverSockCo \n\nHueHabit \n\nVibrantVoyageSocks' additional_kwargs={} example=False


In [27]:
UnstructuredMarkdownLoader("example_notes/A Gentle Introduction to Graph Neural Networks.md").load()

[Document(page_content='tags: #graph #deeplearning\n\nfrom [[distil - pub]].\n\nAn excellent resource for a technically skilled, but not necessarily knowledgeable, practitioner. This assumes some understanding of deep learning more broadly, and a cursory understanding of graphs, but quickly ramps up to an overview suitable for an ML practitioner.\n\nand relates to [[Graph neural network (GNN) Introduction]] which tends to focus on the GCN layers specifically and sampling from a graph, which is also discussed here.\n\nThey emphasise how a [[message passing algorithm]] is used to pass embeddings via the connectivity of the graph to give nodes and edges near it, k-hops away, where the number of layers is the number of hops. these messages are then aggregated together, and then passed through a learned update function to update the node or edge.\n\nmessage passing can occur from edges to edges via node connections and nodes to nodes via edge connections.\n\nThere can also be a learned line

In [28]:

loader = DirectoryLoader('example_notes/', loader_cls=UnstructuredMarkdownLoader, show_progress=True)

docs = loader.load()

100%|██████████| 23/23 [00:00<00:00, 139.63it/s]


In [29]:
[(i,d.metadata["source"]) for i,d in enumerate(docs)]

[(0, 'example_notes/Loaded Questions with details.md'),
 (1,
  'example_notes/give vs take conversation styles and conversational doorknobs.md'),
 (2, 'example_notes/improv theatre as a storytelling framework.md'),
 (3, 'example_notes/relational note taking.md'),
 (4,
  'example_notes/Sketch improv is working backward to build a larger worldview.md'),
 (5, 'example_notes/Obsidian.md'),
 (6, 'example_notes/meaningful spellcasting and the action economy.md'),
 (7,
  'example_notes/Story responsibility for players over just Character responsibility.md'),
 (8, 'example_notes/GCN graph convolution layers.md'),
 (9, 'example_notes/obsidian website with mkdocs.md'),
 (10, 'example_notes/What is zettelkasten.md'),
 (11, 'example_notes/Graph neural network (GNN) Introduction.md'),
 (12, 'example_notes/Gaussian belief propagation.md'),
 (13, 'example_notes/How the Nemesis system creates stories.md'),
 (14, 'example_notes/central party figure as a shared npc.md'),
 (15, 'example_notes/A Gentle In

In [30]:
docs[8]

Document(page_content='blog on GCN more specifically', metadata={'source': 'example_notes/GCN graph convolution layers.md'})

In [34]:




text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)




embeddings = OpenAIEmbeddings()



db = FAISS.from_documents(texts, embeddings)


retriever = db.as_retriever()

from langchain.chains import RetrievalQAWithSourcesChain
qa = RetrievalQAWithSourcesChain.from_chain_type(llm=chat_llm, chain_type="stuff", retriever=retriever)


query = "What did the president say about Ketanji Brown Jackson"
qa(query)

{'question': 'What did the president say about Ketanji Brown Jackson',
 'answer': 'The president did not mention Ketanji Brown Jackson.\nSOURCES:',
 'sources': ''}

In [36]:
db

<langchain.vectorstores.faiss.FAISS at 0x1784c1910>

In [45]:
[(m,d) for d,m in zip(db.get()["documents"], db.get()["metadatas"])  if m["source"]=='example_notes/Graph neural network (GNN) Introduction.md']

[({'source': 'example_notes/Graph neural network (GNN) Introduction.md'},
  'tags: #apple\n\nsource: from twitter list primary blog \nsee [[GCN graph convolution layers]]\n\nOverview\n\nGraph neural networks essentially use weighted [[message passing algorithm]] by multiplying the node features X, the normalised adjacency matrix (or some Linear Diffusion operator) A, and a learned weight matrix W.\n\nSo each layer in the simple GCN would be ReLU(AXW).  However, for a deep network that requires the Adjacency matrix stored in memory, which is prohibitive for large networks. Multiple layers allow information to be passed through the network. Additionally initial work used sampling, SGD, and mini batches, but using the nodes in a graph as samples ignores the fact that they are not independent points from some distribution. More sophisticated deep networks sample subgraphs or L-hop (L = layer number) graphs to reduce the computational load, and address the dependance of samples.'),
 ({'sour

In [46]:
query = "what are the tags appropriate for notes on fruit?"
qa(query)

{'question': 'what are the tags appropriate for notes on fruit?',
 'answer': 'The appropriate tags for notes on fruit are #apple.\n',
 'sources': 'example_notes/test note.md'}

In [51]:
query = "what tags are in the note on Graph neural network (GNN) Introduction that are not appropriate to the content?"
qa(query)

{'question': 'what tags are in the note on Graph neural network (GNN) Introduction that are not appropriate to the content?',
 'answer': 'The inappropriate tags in the note on Graph neural network (GNN) Introduction are "#apple" and "#stub".\n',
 'sources': 'example_notes/Graph neural network (GNN) Introduction.md'}

## Thoughts so far

Seems like this is a case for consuming more of the notes linked to this note 
then doing some prompt engineering to give the model context sufficient to actually do the task of tagging. Can look into agents/chains for this.

Additionally could be useful to look into guidance.