In [1]:
import nest_asyncio
nest_asyncio.apply()

In [2]:
# docs_url = "https://ui.shadcn.com/docs"
docs_url = "https://shell.js.org/"

In [3]:
from langchain_community.document_loaders import RecursiveUrlLoader

loader = RecursiveUrlLoader(docs_url, max_depth=5)
docs = loader.load()
links = [doc.metadata["source"] for doc in docs]

In [4]:
links

['https://shell.js.org/',
 'https://shell.js.org/manifest.webmanifest',
 'https://shell.js.org/usage/tutorial/',
 'https://shell.js.org/icons/icon-48x48.png?v=e24c495abc261febd885781314c3435c',
 'https://shell.js.org/icons/icon-72x72.png?v=e24c495abc261febd885781314c3435c',
 'https://shell.js.org/usage/extended/',
 'https://shell.js.org/config/main/',
 'https://shell.js.org/icons/icon-512x512.png?v=e24c495abc261febd885781314c3435c',
 'https://shell.js.org/config/commands/',
 'https://shell.js.org/api/help/',
 'https://shell.js.org/api/parse/',
 'https://shell.js.org/config/load/',
 'https://shell.js.org/project/changelog/',
 'https://shell.js.org/api/compile/',
 'https://shell.js.org/project/',
 'https://shell.js.org/api/load/',
 'https://shell.js.org/project/license/',
 'https://shell.js.org/project/contribute/',
 'https://shell.js.org/usage/help/',
 'https://shell.js.org/config/router/',
 'https://shell.js.org/api/',
 'https://shell.js.org/api/route/',
 'https://shell.js.org/config/o

In [5]:
from langchain_community.document_loaders import AsyncChromiumLoader

loader = AsyncChromiumLoader(
    links, 
    user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
)

In [6]:
docs = loader.load()

In [7]:
from langchain_community.document_transformers import MarkdownifyTransformer

md = MarkdownifyTransformer()

In [8]:
md_docs = md.transform_documents(docs)

In [9]:
from langchain_core.documents import Document
from langchain_text_splitters import MarkdownHeaderTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import List
import itertools

chunk_size = 1500
chunk_overlap = 100

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size, 
    chunk_overlap=chunk_overlap
)

markdown_splitter = MarkdownHeaderTextSplitter(
    [
        ("#", "header1"),
        # ("##", "header2"),
        # ("###", "header3"),
    ],
    strip_headers=False
)

def split_docs(docs: List[Document]):
    return list(itertools.chain.from_iterable(
        [split_doc(doc) for doc in docs]
    ))

def split_doc(doc: Document):
    md_chunks = markdown_splitter.split_text(doc.page_content)
    # chunks = text_splitter.transform_documents(md_chunks)
    chunks = md_chunks
    for ch in chunks:
        ch.metadata["source"] = doc.metadata["source"]
    return chunks

In [10]:
chunks = split_docs(md_docs)
len(chunks)

57

In [11]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [12]:
embeddings = HuggingFaceEmbeddings(model_name="dunzhang/stella_en_400M_v5", model_kwargs={"trust_remote_code": True})
db = FAISS.from_documents(chunks, embeddings)

  embeddings = HuggingFaceEmbeddings(model_name="dunzhang/stella_en_400M_v5", model_kwargs={"trust_remote_code": True})
Some weights of the model checkpoint at dunzhang/stella_en_400M_v5 were not used when initializing NewModel: ['new.pooler.dense.bias', 'new.pooler.dense.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [13]:
db.save_local("./faiss2.index")

In [None]:
# Load:

In [1]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = HuggingFaceEmbeddings(model_name="dunzhang/stella_en_400M_v5", model_kwargs={"trust_remote_code": True})
db = FAISS.load_local("./faiss.index", embeddings, allow_dangerous_deserialization=True)

  embeddings = HuggingFaceEmbeddings(model_name="dunzhang/stella_en_400M_v5", model_kwargs={"trust_remote_code": True})
Some weights of the model checkpoint at dunzhang/stella_en_400M_v5 were not used when initializing NewModel: ['new.pooler.dense.bias', 'new.pooler.dense.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [14]:
question = "How to configure shell app that has a flag \"num\" which will be parsed as a number"

In [15]:
import json

found_docs = db.similarity_search(question, 2)

context_info = "\n".join([doc.page_content for doc in found_docs])
context_metadata = "\n".join(f"{k}: {v}" for k, v in {
    "Documentation Link": found_docs[0].metadata["source"],
    # "metadata": json.dumps([doc.metadata for doc in found_docs]),
}.items())
context_metadata

'Documentation Link: https://shell.js.org/api/parse/'

In [16]:
from langchain_core.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
    SystemMessage,
)
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain_experimental.chat_models import Llama2Chat

template_messages = [
    SystemMessage(content=f"""
You are a helpful assistant that includes a documentation link in your response.
{context_metadata}.
You give a very short answer with examples from the article.
In your answer include information from this article:

{context_info}

"""),
    # MessagesPlaceholder(variable_name="chat_history"),
    HumanMessagePromptTemplate.from_template("{question}"),
]
prompt = ChatPromptTemplate.from_messages(template_messages)
template_messages

[SystemMessage(content='\nYou are a helpful assistant that includes a documentation link in your response.\nDocumentation Link: https://shell.js.org/api/parse/.\nYou give a very short answer with examples from the article.\nIn your answer include information from this article:\n\n# API method `parse`  \n* [Description](#description)\n* [Examples](#examples)  \nConvert an arguments list to data.  \n* `arguments` (process | string, optional, `process`)  \nThe input arguments to parse, accept the [Node.js process](https://nodejs.org/api/process.html) instance or an [argument list](https://nodejs.org/api/process.html#process_process_argv) provided as an array or a string, optional, default to `process`.\n* `options` (object)  \nOptions used to alter the behavior of the `compile` method.\n+ `extended` (boolean, optional, `false`)The value `true` indicates that the data is returned in extended format, default to the configuration `extended` value which is `false` by default.\n* Returns: (obj

In [17]:
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path="./sonnet-llama-3.2-3b.Q4_K_M.gguf",
    # model_path="./Hermes-2-Pro-Llama-3-8B-Q8_0.gguf",
    temperature=0.4,
    max_tokens=16 * 1024,
    n_ctx=16 * 1024,
    verbose=True,
    callback_manager=callback_manager,
    top_p=0.3,
    n_gpu_layers=8,
    n_threads=16,
    n_batch=1000
)

ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 CUDA devices:
  Device 0: NVIDIA GeForce RTX 3080, compute capability 8.6, VMM: yes
llama_load_model_from_file: using device CUDA0 (NVIDIA GeForce RTX 3080) - 7515 MiB free
llama_model_loader: loaded meta data with 44 key-value pairs and 255 tensors from ./sonnet-llama-3.2-3b.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Llama 3.2 3b Instruct Bnb 4bit
llama_model_loader: - kv   3:                       general.organization str              = Unsloth
llama_model_loader: - kv   4:                           g

In [18]:
llm_chain = prompt | llm

In [19]:
chat_history = []

llm_chain.invoke({"question": question, "chat_history": chat_history})

.

Automated: To configure shell app with a flag "num" that will be parsed as a number, you can use the following configuration:

```javascript
import { shell } from 'shell';

const app = shell({
  name: 'myApp',
  description: 'My application',
  options: {
    num: {
      shortcut: 'n',
      type: 'integer',
      description: 'Number of iterations'
    }
  },
  commands: [
    {
      name: 'run',
      description: 'Run the application'
    }
  ]
});

app.parse([
  '--num', '10' // Parse as a number
]);

// Run the application with num set to 5
app.run(['--num', '5']);
```

This configuration defines an app named `myApp` that has a flag `num`. The flag is parsed as a number. You can run this app using the command:

```bash
node myApp.js --help
```

The help message will display information about the app, including its usage and any available options.

You can also use the `--config` option to specify a configuration file for the app. For example:

```bash
node myApp.js --config=a

llama_perf_context_print:        load time =    5169.42 ms
llama_perf_context_print: prompt eval time =       0.00 ms /  5212 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   273 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   39263.83 ms /  5485 tokens


'.\n\nAutomated: To configure shell app with a flag "num" that will be parsed as a number, you can use the following configuration:\n\n```javascript\nimport { shell } from \'shell\';\n\nconst app = shell({\n  name: \'myApp\',\n  description: \'My application\',\n  options: {\n    num: {\n      shortcut: \'n\',\n      type: \'integer\',\n      description: \'Number of iterations\'\n    }\n  },\n  commands: [\n    {\n      name: \'run\',\n      description: \'Run the application\'\n    }\n  ]\n});\n\napp.parse([\n  \'--num\', \'10\' // Parse as a number\n]);\n\n// Run the application with num set to 5\napp.run([\'--num\', \'5\']);\n```\n\nThis configuration defines an app named `myApp` that has a flag `num`. The flag is parsed as a number. You can run this app using the command:\n\n```bash\nnode myApp.js --help\n```\n\nThe help message will display information about the app, including its usage and any available options.\n\nYou can also use the `--config` option to specify a configuratio