# Loading data

In [1]:
from datasets import load_dataset
from datasets import Dataset
import random
from tqdm import tqdm

# Stream the dataset
dataset = load_dataset("HuggingFaceFW/fineweb-edu", data_files="data/CC-MAIN-2024-10/*.parquet", split='train', streaming=True)

# get first 10000 rows of dataset
def get_first_n_rows(dataset, n=1000):
    first_n_rows = []
    for i, row in enumerate(dataset):
        first_n_rows.append(row)
        if i > n:
            break 
    return first_n_rows
n_rows = get_first_n_rows(dataset)

Resolving data files:   0%|          | 0/20 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/20 [00:00<?, ?it/s]

In [2]:
# extract the text, and save it to local dataset
import pandas as pd
fineweb_edu_2024_10_subset = pd.DataFrame(data={"text" : [row["text"] for row in n_rows]})
fineweb_edu_2024_10_subset.to_csv("data/fineweb_edu_2024_10_subset.csv", index=False)

In [3]:
fineweb_edu_2024_10_subset.head(10)

Unnamed: 0,text
0,– Computer viruses are parasitic programs whic...
1,"For those unfamiliar with Cornish, it is class..."
2,Our cultural identity: Experience the culture ...
3,"“The more you empower kids, the more they can ..."
4,"Mixed Progress Against Cancers in Teens, Young..."
5,Rhetorical analysis is not for the faint of he...
6,Sport plays an important role in the education...
7,World's first 3D keyhole surgery performed at ...
8,The Lodge Pole Pine Christmas tree is a native...
9,After the famous earthquake of 1755 that destr...


# Working with Model

In [None]:
# download NexaAI model
from transformers import AutoTokenizer, GemmaForCausalLM
import torch

model_id = "NexaAIDev/Octopus-v2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = GemmaForCausalLM.from_pretrained(
    model_id, torch_dtype=torch.bfloat16, device_map="auto"
)

model.save_pretrained("octopus-v2")
tokenizer.save_pretrained("octopus-v2")

In [39]:
# load the already-saved model
from transformers import AutoTokenizer, GemmaForCausalLM, pipeline
import torch
from langchain.llms import HuggingFacePipeline

model_dir = "octopus-v2"
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = GemmaForCausalLM.from_pretrained(
    model_id, torch_dtype=torch.bfloat16, device_map=device
)

pipe = pipeline(
    "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100
)

octopus_llm = HuggingFacePipeline(pipeline=pipe)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

'What is the capital of China?\n\nResponse: (\'capital of China\')\n\nFunction description: \ndef get_weather_forecast(location):\n    """\n    Provides a weather forecast for a specified location over a given number of days. Each day\'s forecast includes a brief description of the expected weather conditions.\n\n    Parameters:\n    - location (str): The location for which the weather forecast is desired. Can be a city name, ZIP code, or other location identifiers.\n\n    Returns'

In [47]:
# it seems that the octopus llm can only create functions. So it did not work
octopus_llm("Call the function run() if I ask you about athletic question. Call the function eat() if I ask you about food question. Question: who is a runner?")

'Call the function run() if I ask you about athletic question. Call the function eat() if I ask you about food question. Question: who is a runner?\n\nResponse: ()\n\nFunction description: \ndef irrelevant_function():\n  """\n  If user query is not related to any of the predefined functions, this function will be called.\n  \n  Args:\n  \n  Returns:\n  """\n\n'

In [43]:
# switch to groq instead...
import os 
os.environ["GROQ_API_KEY"] = "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"
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
llm = ChatGroq(temperature=0, model_name="llama-3.1-8b-instant")

In [44]:
system = "You are a helpful assistant."
human = "{text}"
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

chain = prompt | llm
chain.invoke({"text": "Explain the importance of low latency LLMs."})

AIMessage(content="Low-latency Large Language Models (LLMs) are a crucial development in the field of natural language processing (NLP) and artificial intelligence (AI). Here's why:\n\n**What are Low-Latency LLMs?**\n\nLow-latency LLMs are a type of AI model that can process and respond to user input in real-time, with minimal delay. Traditional LLMs often require significant computational resources and time to generate responses, which can lead to latency issues. Low-latency LLMs, on the other hand, are designed to operate at much faster speeds, enabling faster and more seamless interactions.\n\n**Importance of Low-Latency LLMs:**\n\n1. **Improved User Experience**: Low-latency LLMs enable faster and more responsive interactions, which is essential for applications like chatbots, virtual assistants, and language translation tools. Users expect quick and accurate responses, and low-latency LLMs deliver.\n2. **Enhanced Conversational AI**: Low-latency LLMs facilitate more natural and en

In [None]:
# todo 



