In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()


In [2]:

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

In [9]:
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma

In [10]:
import gpt4all

In [38]:
x = GPT4AllEmbeddings()


In [46]:
import pandas as pd

df = pd.read_csv('../dataset/auto_1_langchain.csv')
names = df['name'].to_list()
steps = df['steps'].to_list()
ingredients = df['ingredients'].to_list()

texts = []

for n, s, i in zip(names, steps, ingredients):
    texts.append(f'name: {n} ; ingredients: {i} ; preparation: {i}')

In [57]:
from sklearn.metrics.pairwise import cosine_distances, cosine_similarity
from tqdm import tqdm
import random

from nltk.translate.bleu_score import sentence_bleu

tmp = []

for i in tqdm(range(1000)):
    examples = random.choices(texts, k=2)
    tmp.append(sentence_bleu(examples[0], examples[1], weights=[0.5, 0.5]))

  0%|          | 0/1000 [00:00<?, ?it/s]

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
100%|██████████| 1000/1000 [00:13<00:00, 76.12it/s]


In [73]:
sentences = [
    "Preheat oven to 350 degrees F. Spray an 11x7x2-inch baking dish with cooking spray. In a large bowl, combine the chicken, celery and water chestnuts. Stir in the soup and mayonnaise. Spread half the mixture into the prepared pan. Unroll the dough and separate into 9 squares. Place 3 squares in the bottom of the pan. Spread with half the sour cream. Top with half the chicken mixture and half the Parmesan cheese. Repeat the layers. Bake for 40 minutes. Let stand for 10 minutes. Cut into 6 squares. Serve warm.", 
    "Preheat the oven to 375 degrees. Put the chicken, celery, water chestnuts, soup, mayonnaise and sour cream in a large bowl and stir until well combined. Put the chicken mixture into a 9x13 baking dish that has been coated with the cooking spray. Bake for 25-30 minutes, or until celery is crisp-tender. Remove the casserole from the oven. Unroll the croissant dough into flat pieces and cover the chicken mixture with the slices of dough. Drizzle with the melted butter. Bake for 10-12 minutes or until the croissant topping is turning golden brown. Remove the casserole from the oven and sprinkle with the parmesan cheese. Bake for 1-2 more minutes. Let stand 5 minutes before serving."
    ]


print(sentence_bleu([sentences[0]], sentences[1], weights=[0.5, 0.5])) 

0.7071067811865476


In [54]:
print(tmp)

[9.768142374145008e-232, 1.0365786590631523e-231, 9.935665127248702e-232, 1.0400016401250536e-231, 1.1660767189343928e-231, 1.0430324475818594e-231, 9.53667732973359e-232, 1.0126455930533508e-231, 9.888463483086503e-232, 1.0414710173847794e-231, 8.924178303539991e-232, 1.0179167992307073e-231, 9.94262531724427e-232, 1.0881585058887664e-231, 1.0049683504181483e-231, 9.844849180073599e-232, 1.0013597882056462e-231, 8.850193570579603e-232, 1.0572155961960705e-231, 9.115950231440499e-232, 9.817780325683613e-232, 9.718756419071616e-232, 1.0377133938315695e-231, 1.072756664658447e-231, 1.015383921264607e-231, 9.468085247078445e-232, 9.187929551544542e-232, 9.971469906067022e-232, 9.13647407667802e-232, 1.0505112911309325e-231, 9.96763840875745e-232, 1.0477021619035363e-231, 9.884192426889356e-232, 1.0649365332831978e-231, 1.137030182511939e-231, 9.500581443932479e-232, 9.379965251837572e-232, 1.1378719080845818e-231, 1.0296788793467617e-231, 9.514993728653354e-232, 1.0865159978022652e-231, 1

In [51]:
new_tmp = []
for i in tmp:
    new_tmp.append(i[0][0])

In [58]:
import statistics
print(statistics.mean(tmp))

4.543106092879663e-155


In [40]:
print(cosine_distances(X, Y))
print(cosine_similarity(X, Y))

[[0.69976513]]
[[0.30023487]]


In [4]:
vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())

bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522


In [9]:
question = "What are the approaches to Task Decomposition?"
docs = vectorstore.similarity_search(question, 5)
len(docs)

5

In [6]:
from langchain_community.llms import LlamaCpp

In [7]:
n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/Users/rlm/Desktop/Code/llama.cpp/models/llama-2-13b-chat.ggufv3.q4_0.bin",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=2048,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    verbose=True,
)

ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no
ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes
ggml_init_cublas: found 1 CUDA devices:
  Device 0: NVIDIA GeForce RTX 4060 Ti, compute capability 8.9, VMM: yes
llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from models/llama-2-7b.Q6_K.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32        

In [8]:
res = llm.invoke("Tell me a joke")
res




llama_print_timings:        load time =      67.02 ms
llama_print_timings:      sample time =      18.38 ms /   110 runs   (    0.17 ms per token,  5985.42 tokens per second)
llama_print_timings: prompt eval time =      66.99 ms /     6 tokens (   11.17 ms per token,    89.56 tokens per second)
llama_print_timings:        eval time =    2768.48 ms /   109 runs   (   25.40 ms per token,    39.37 tokens per second)
llama_print_timings:       total time =    2994.56 ms /   115 tokens


" about my birthday.\nMy mom is 26 years old and she had me when she was only 13!\nHahaha, what's the worst thing that could happen at a clown funeral?\nWhy did the pencil run away from the pen?\nBecause it got lead poisoning!\nDid you hear about the mathematician who had cancer?\nIt wasn’t a problem for him; he just used the Pythagorean theorem to figure out his treatment."

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

# Prompt
prompt = PromptTemplate.from_template(
    "Summarize the main themes in these retrieved docs: {docs}"
)


# Chain
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


chain = {"docs": format_docs} | prompt | llm | StrOutputParser()

# Run
question = "What are the approaches to Task Decomposition?"
docs = vectorstore.similarity_search(question)
chain.invoke(docs)