# Chapter04

In [None]:
from transformers import T5Tokenizer,T5ForConditionalGeneration

model_name = "t5-small"
model =T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)

In [None]:
from transformers import pipeline

pipe = pipeline(
    task="text2text-generation",
    model=model,
    tokenizer=tokenizer,
    device="cuda:0"
)

In [None]:
from datasets import load_dataset
data = load_dataset("rotten_tomatoes")
data

In [None]:
prompt = "Is the following sentence positive or negative?"
data = data.map(lambda example: {"t5": prompt + example["text"]})
data

In [None]:
from sklearn.metrics import classification_report

def evaluate_performance(y_true, y_pred):
    """Create and print the classification report"""
    performance = classification_report(
        y_true, y_pred,
        target_names=["Negative Review", "Positive Review"]
    )
    print(performance)

In [None]:
import numpy as np
from tqdm import tqdm
from transformers.pipelines.pt_utils import KeyDataset


y_pred = []
for output in tqdm(pipe(KeyDataset(data["test"], "t5")), total=len(data["test"])):
    text = output[0]["generated_text"]
    y_pred.append(0 if text == "negative" else 1)



In [None]:
evaluate_performance(data["test"]["label"], y_pred)

# Chapter 05

In [None]:
from datasets import load_dataset
dataset = load_dataset("maartengr/arxiv_nlp")["train"]

In [None]:
dataset

In [None]:
abstracts = dataset["Abstracts"]
titles = dataset["Titles"]

In [None]:
from sentence_transformers import SentenceTransformer

embedding_model = SentenceTransformer("thenlper/gte-small")
embeddings = embedding_model.encode(abstracts, show_progress_bar=True)

In [None]:
embeddings.shape

In [None]:
from umap import UMAP

umap_model = UMAP(n_components= 5, min_dist=0.0, metric="cosine", random_state=42)
reduced_embeddings =umap_model.fit_transform(embeddings)

In [None]:
from hdbscan import HDBSCAN
hdbscan_model = HDBSCAN(min_cluster_size=50, metric="euclidean", cluster_selection_method="eom").fit(reduced_embeddings)
clusters =hdbscan_model.labels_
len(set(clusters))

In [None]:
import pandas as pd

# Reduce 384-dimensional embeddings to 2 dimensions for easier visualization
reduced_embeddings = UMAP(
    n_components=2, min_dist=0.0, metric='cosine', random_state=42
).fit_transform(embeddings)

# Create dataframe
df = pd.DataFrame(reduced_embeddings, columns=["x", "y"])
df["title"] = titles
df["cluster"] = [str(c) for c in clusters]

# Select outliers and non-outliers (clusters)
clusters_df = df.loc[df.cluster != "-1", :]
outliers_df = df.loc[df.cluster == "-1", :]

import matplotlib.pyplot as plt

# Plot outliers and non-outliers seperately
plt.scatter(outliers_df.x, outliers_df.y, alpha=0.05, s=2, c="grey")
plt.scatter(
    clusters_df.x, clusters_df.y, c=clusters_df.cluster.astype(int),
    alpha=0.6, s=2, cmap='tab20b'
)
plt.axis('off')

# Chapter 06

In [None]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

model_name = "microsoft/Phi-3-mini-4k-instruct"

model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map="cuda",
                                             torch_dtype="auto",
                                             trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)

pipe = pipeline(task="text-generation",
                model=model,
                tokenizer=tokenizer,
                return_full_text=False,
                max_new_tokens=500,
                do_sample=False)

In [None]:
#check pipeline info
type(pipe)
# --- IGNORE ---
dir(pipe)
pipe.task
pipe.model
pipe.tokenizer
pipe.model.config
pipe.model.generation_config

In [None]:
#prompt
messages = [{"role": "user",
             "content": "Create a funny joke about chickens."}]
#generate the output
output = pipe(messages)
output[0]["generated_text"]

In [None]:
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False)
print(prompt)

In [None]:
output = pipe(messages, do_sample=True, temperature=1)
print(output)

In [None]:
output = pipe(messages, do_sample=True, top_p=1)
print(output)

In [None]:
product_prompt =[{"role":"user",
                  "content": " Create a name and slogan for a chatbot that leverages LLMs."}]
outputs = pipe(product_prompt)
product_description = outputs[0]["generated_text"]
print(product_description)

In [None]:
sales_prompt=[{"role":"user",
               "content": f"Generate a very short sales pitch for the following product: '{product_description}'"}]
outputs = pipe(sales_prompt)
sales_pitch = outputs[0]["generated_text"]
print(sales_pitch)

# Chapter 07

In [5]:
from langchain_ollama.llms import OllamaLLM

llm_model = OllamaLLM(model="tinyllama:latest",
                  n_gpu_layers=-1,
                  max_tokens=20,
                  n_ctx=2048,
                  seed=42,
                  verbose=True)

In [6]:
llm_model.invoke("Hi! My name is Marten")

"You are a great assistant, Marten! Thank you for using my knowledge and insights to make your life easier. I appreciate it more than words could express. It's always nice when people appreciate the work we do for them. Have a great day!"

In [9]:
from langchain_core.prompts import PromptTemplate
template = """just show me the answer of: {input_prompt}"""

prompt = PromptTemplate(template=template,
                        input_variable=["input_prompt"])

basic_chain = prompt | llm_model
basic_chain.invoke({"input_prompt":" Hi! what is 1+1?"})

'Certainly! The formula for computing the sum of two integers (i.e. 1+1) is simply:\n\n1+1 = 2\n\nSo the result will be:\n\n2\n\nHope that helps! Let me know if you have any other questions or need further assistance.'

In [14]:
from langchain_core.prompts import PromptTemplate

# 1) Title
title_prompt = PromptTemplate(
    template="Create a title for a story about {summary}. Only return the title.",
    input_variables=["summary"],
)

def extract_text(output):
    # Handles ChatMessage / AIMessage / plain strings
    return getattr(output, "content", str(output))

def title_chain(input_dict):
    title = llm_model.invoke(title_prompt.format(**input_dict))
    return {
        "summary": input_dict["summary"],
        "title": extract_text(title).strip(),
    }

# 2) Character
character_prompt = PromptTemplate(
    template=(
        "Describe the main character of a story about {summary} "
        "with the title of {title}. Use only 2 sentences."
    ),
    input_variables=["summary", "title"],
)

def character_chain(input_dict):
    character = llm_model.invoke(character_prompt.format(**input_dict))
    return {
        "summary": input_dict["summary"],
        "title": input_dict["title"],
        "character": extract_text(character).strip(),
    }

# 3) Story
story_prompt = PromptTemplate(
    template=(
        "Create a story about {summary}. The main character is {character} "
        "with the title of {title}. Only return the story and it cannot be "
        "longer than one paragraph."
    ),
    input_variables=["summary", "title", "character"],
)

def story_chain(input_dict):
    story = llm_model.invoke(story_prompt.format(**input_dict))
    return extract_text(story).strip()

# Run the pipeline
input_data = {"summary": "A girl that lost her mother."}
result = story_chain(character_chain(title_chain(input_data)))
print(result)


As Maya strode through the streets of her small town, she felt as if the world had turned upside down. She couldn't shake the feeling that something was missing in her life, a familiar sense of emptiness that had been plaguing her since her mother passed away years ago. But even as she searched for answers, she knew that moving to a new city would be the beginning of her journey towards finding her mom.

Maya never thought that moving to a big city like New York could be the solution, but something about the vibrant streets and bustling crowds drew her in. She set out on a solo adventure, determined to find her mom wherever she might be, with no expectation or plans. As she made her way through the crowded subways, the hum of voices and chatter echoing around her, Maya felt a sense of determination that had eluded her for far too long.

It wasn't until she stumbled upon a small coffee shop in an unfamiliar part of town that she realized just how far she had come. The owner greeted her 