# Knowledge Extraction from Chat Conversations #

This notebook explores several ways that knowledge about an individual can be extracted from a chat transcript. This knowledge is stored in different formats, including descriptive summaries, JSON lists, etc. A subsequent notebook explores which format is best for retrieving information about the user in order to simulate them.

In [None]:
!pip install --quiet --upgrade torch

In [None]:
# !pip install --quiet --upgrade transformers
!pip install --quiet --upgrade git+https://github.com/huggingface/transformers

In [None]:
# Using `low_cpu_mem_usage=True` or a `device_map` while instantiating a model requires Accelerate
!pip install --quiet --upgrade accelerate

In [None]:
# Required if loading a model quantized with bitsandbytes
!pip install --quiet --upgrade bitsandbytes

In [None]:
conversation = [
    {"role": "assistant", "content": "what is your name?"},
    {"role": "user", "content": "my name is John"},
    {"role": "assistant", "content": "how are you today?"},
    {"role": "user", "content": "I'm great, thanks" },
    {"role": "assistant", "content": "are you a student or employed?"},
    {"role": "user", "content": "I am a farmer"},
    {"role": "assistant", "content": "are you seeing someone or are you married?"},
    {"role": "user", "content": "yes, my wife's name is Jane"},
    {"role": "assistant", "content": "how long have you been married?"},
    {"role": "user", "content": "15 years"},
    {"role": "assistant", "content": "do you have any children?"},
    {"role": "user", "content": "yes, two"},
    {"role": "assistant", "content": "what are their names?"},
    {"role": "user", "content": "Jack and Jill"},
    {"role": "assistant", "content": "how old are they?"},
    {"role": "user", "content": "10 and 12, respectively"},
]

conversation_str = ""
for message in conversation:
    conversation_str += f"{message['role']}: {message['content']}\n"

In [None]:
model_id = "mistralai/Mistral-Nemo-Instruct-2407"

## Quantized Gemma model, stored locally
# model_id = "models/google__gemma-2-27b-it"

# model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

## Quantized Llama model, stored locally
# model_id = "models/meta-llama__Meta-Llama-3-70B-Instruct"

model_kwargs = {
    "low_cpu_mem_usage": True,
    "device_map": "sequential", # load the model into GPUs sequentially, to avoid memory allocation issues with balancing
    "torch_dtype": "auto"
}

In [None]:
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    **model_kwargs
)

In [None]:
import utils
utils.print_model_info(model)

In [None]:
generate_kwargs = {
    "max_new_tokens": 1024,
    "do_sample": True,
    "temperature": 0.7,
    "top_k": 50,
    "top_p": 0.95,
    "bos_token_id": tokenizer.bos_token_id,
    "pad_token_id": tokenizer.eos_token_id,
}

if 'llama' in model_id.lower():
    generate_kwargs["eos_token_id"] = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
else:
    generate_kwargs["eos_token_id"] = tokenizer.eos_token_id

In [None]:
%%time

prompt = """
Create a paragraph summary of facts about the user from the following coversation:
"""

messages = [
    {"role": "user", "content": prompt + conversation_str}
]

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

outputs = model.generate(
    input_ids,
    **generate_kwargs
)

response = outputs[0][input_ids.shape[-1]:]

print(tokenizer.decode(response, skip_special_tokens=True))

In [None]:
import transformers
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
from transformers import TextStreamer

# tokenizer = AutoTokenizer.from_pretrained(model_id)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

pipe = transformers.pipeline(
    "text-generation",
    # model=model_id,
    model=model,
    model_kwargs=model_kwargs,
    tokenizer=tokenizer,
    streamer=streamer
)

In [None]:
%%time

prompt = """
Create a paragraph summary of facts about the user from the following coversation:
"""

messages = [
    {"role": "user", "content": prompt + conversation_str}
]

_ = pipe(
    messages,
    **generate_kwargs
)

In [None]:
%%time

prompt = """
You are an AI assistant that lists facts about the user from chat conversations.
You list facts in a JSON array, with each fact in a dictionary with keys fact and confidence_score.
List all discrete facts about the user from the following chat conversation:
"""

messages = [
    {"role": "user", "content": prompt + conversation_str}
]

_ = pipe(
    messages,
    **generate_kwargs
)

In [None]:
%%time

prompt = """
You are an AI assistant that identifies facts about the user from chat conversations.
You list facts in a JSON array, with each fact in a dictionary with keys subject, predicate, object, and confidence_score.
List all discrete facts about the user from the following chat conversation:
"""

messages = [
    {"role": "user", "content": prompt + conversation_str}
]

_ = pipe(
    messages,
    **generate_kwargs
)