<a href="https://colab.research.google.com/github/tomasonjo/blogs/blob/master/weaviate/HubermanWeaviate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# https://github.com/aigeek0x0/zephyr-7b-alpha-langchain-chatbot/tree/main

In [2]:
#install required packages
!pip install -q transformers peft accelerate bitsandbytes safetensors sentencepiece streamlit weaviate-client langchain sentence-transformers tiktoken youtube-transcript-api


In [3]:
WEAVIATE_URL = ""
WEAVIATE_API_KEY = ""

In [4]:
# fixing unicode error in google colab
import locale
locale.getpreferredencoding = lambda: "UTF-8"

# import dependencies
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from langchain.text_splitter import TokenTextSplitter
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceEmbeddings
from langchain import HuggingFacePipeline
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.vectorstores import Weaviate
import weaviate

In [5]:
# specify embedding model (using huggingface sentence transformer)
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, model_kwargs=model_kwargs)

In [6]:
import requests
import xml.etree.ElementTree as ET

URL = "https://www.youtube.com/feeds/videos.xml?channel_id=UC2D2CMWXMOVWx7giW1n3LIg"

response = requests.get(URL)
xml_data = response.content

# Parse the XML data
root = ET.fromstring(xml_data)

# Define the namespace
namespaces = {
    'atom': 'http://www.w3.org/2005/Atom',
    'media': 'http://search.yahoo.com/mrss/'
}

# Extract YouTube links
youtube_links = [link.get('href') for link in root.findall(".//atom:link[@rel='alternate']", namespaces)][1:]

In [8]:
from langchain.document_loaders import YoutubeLoader

all_docs = []
for link in youtube_links:
  loader = YoutubeLoader.from_youtube_url(link)
  docs = loader.load()
  all_docs.extend(docs)
text_splitter = TokenTextSplitter(chunk_size=128, chunk_overlap=0)
split_docs = text_splitter.split_documents(docs)

client = weaviate.Client(url=WEAVIATE_URL, auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY))
vector_db = Weaviate.from_documents(split_docs, embeddings, client=client, by_text=False)

In [9]:
vector_db.similarity_search("Wassup", k=1)

[Document(page_content=" how it stands to\npotentially transform every aspect of everyday life. Before we begin, I'd\nlike to emphasize that this podcast is separate\nfrom my teaching and research roles at Stanford. It is, however, part\nof my desire and effort to bring zero cost to\nconsumer information about science and\nscience-related tools to the general public. In keeping with\nthat theme, I'd like to thank the sponsors\nof today's podcast. Our first sponsor\nis Eight Sleep Eight Sleep makes smart mattress\ncovers with cooling, heating, and sleep tracking capacity. I've spoken many times before\non this podcast about", metadata={'source': '1Wo6SqLNmLk'})]

In [10]:
# specify model huggingface mode name
model_name = "anakin87/zephyr-7b-alpha-sharded"

# function for loading 4-bit quantized model
def load_quantized_model(model_name: str):
    """
    :param model_name: Name or path of the model to be loaded.
    :return: Loaded quantized model.
    """
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        load_in_4bit=True,
        torch_dtype=torch.bfloat16,
        quantization_config=bnb_config
    )
    return model

In [11]:
# function for initializing tokenizer
def initialize_tokenizer(model_name: str):
    """
    Initialize the tokenizer with the specified model_name.

    :param model_name: Name or path of the model for tokenizer initialization.
    :return: Initialized tokenizer.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name, return_token_type_ids=False)
    tokenizer.bos_token_id = 1  # Set beginning of sentence token id
    return tokenizer

In [12]:
# initialize tokenizer
tokenizer = initialize_tokenizer(model_name)
# load model
model = load_quantized_model(model_name)
# specify stop token ids
stop_token_ids = [0]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [13]:
# build huggingface pipeline for using zephyr-7b-alpha
pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        use_cache=True,
        device_map="auto",
        max_length=2048,
        do_sample=True,
        top_k=5,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
)

# specify the llm
llm = HuggingFacePipeline(pipeline=pipeline)

# build conversational retrieval chain with memory (rag) using langchain
def create_conversation(query: str, chat_history: list = []) -> tuple:
    try:
        qa_chain = ConversationalRetrievalChain.from_llm(
            llm=llm,
            retriever=vector_db.as_retriever(),
            get_chat_history=lambda h: h,
        )

        result = qa_chain({'question': query, 'chat_history': chat_history})
        chat_history.append((query, result['answer']))
        return '', chat_history


    except Exception as e:
        chat_history.append((query, e))
        return '', chat_history

In [15]:
create_conversation("What's Zuckerberg talking about?")



('',
 [("What's Zuckerberg talking about?",
   ' Zuckerberg is discussing the CZI, or Chan Zuckerberg Initiative, and its goal to discover new pathways and cures for all human diseases through critical funding and artificial intelligence platforms.')])