In [1]:
# pip install bitsandbytes accelerate flash-attn

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import transformers
import bz2
import pickle

from tqdm import tqdm


import time
from IPython.display import JSON
import sys
sys.settrace(None)
import pdb

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer, StoppingCriteria, StoppingCriteriaList, DynamicCache, OffloadedCache #, CodeGenTokenizer
from transformers.generation.logits_process import LogitsProcessorList

import copy

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import psycopg

In [4]:
model_name = "microsoft/Phi-3-mini-128k-instruct"

In [5]:
device = 'cuda'

In [6]:
postgresql_connection = psycopg.connect('postgres://postgres:secret@10.0.0.118:5432/postgres', autocommit=False)

In [7]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [8]:
rootkey = 60000

In [9]:
tokenizer.eos_token

'<|endoftext|>'

In [10]:
assert rootkey > max(tokenizer.vocab.values())

In [11]:
tokenizer.convert_ids_to_tokens([0])

['<unk>']

In [12]:
assert torch.cuda.is_available()

In [13]:
quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype='bfloat16'
            )

model = AutoModelForCausalLM.from_pretrained(model_name,
                                             trust_remote_code=True,
                                             quantization_config=quantization_config,
                                             low_cpu_mem_usage=True,
                                             #attn_implementation="flash_attention_2",
                                             #attn_implementation="flash_attention",
                                            )

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.
Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.12s/it]


In [14]:
assert model.device.type == device

In [15]:
prompt = '''You are a question-answering system that reasons using structured data in the form of facts.
Given an input question, you generate a concise single answer based on knowledge facts.
Follow this format:

Question: The question to be answered.
Facts for the reasoning process: some facts containing entities, relationships, and values relevant to the question.
Long answer: the reasoning process you followed to reach the answer also based on the facts.
Answer: the concise answer. After answering please terminate with <|endoftext|>.

Example:
Question: Is Mont Blanc taller than Mount Rainier?
Facts for the reasoning process:
Fact: <Mont Blanc> <elevation above sea level> <4,807.02±0.5 metre> .
Fact: <Mount Rainier> <elevation above sea level> <4,389 metre> .
Long answer: Basing on the evidence that the elevation above sea level of Mont Blanc (4,807.02±0.5 metres) is greater than the elevation above sea level of Mount Rainier (4,389 metres), Mont Blanc is taller than Mount Rainier.
Answer: Yes, Mont Blanc is taller than Mount Rainier.
<|endoftext|>

As you can see in the example, triples generally start with information contained in the question and provide additional information.
Unfortunately, some of the retrieved facts may irrelevant. You should ignore these irrelevant triples.
Remeber to end with <|endoftext|>.


Now, answer the following question:
Question: ''','''
Triples for the reasoning process:
Fact:'''
prompt = ('''<|system|>
You are a helpful question answering assistant that bases its answer on facts from a knowledge base.
1) You receive an input question.
2) You reason on the path you need to follow to reach the answer starting from the information in the question.
3) You provide the relevant facts useful to reach the answer and you reason on top of them.
4) You explain your reasoning process and provide a long answer with your motivations.
5) You provide a short concise answer.
<|end|>

<|user|>
Which mountain is taller between Mont Blanc and Mount Rainier?
<|end|>

<|assistant|>
Reasoning: I need to provide the height of Mont Blanc and the height of Mount Rainier, then I need to compare the two heights and the final answer will be the taller mountain.
Fact: <Mont Blanc> <elevation above sea level> <4,807.02±0.5 meters> .
I found the height of Mont Blanc. I still need the height of Mount Rainier.
Fact: <Mount Rainier> <elevation above sea level> <4,389 meters> .
I also found the height of Mount Rainier. Now I can compare the heights and provide an answer.
Long answer: Mont Blanc is 4,807 meters tall, while Mount Rainier is 4,389 meters, so Mont Blanc is taller than Mount Rainier.
Final answer: Mont Blanc.
<|end|>

<|user|>
''','''
<|end|>

<|assistant|>






''')

In [16]:
print(prompt)

('<|system|>\nYou are a helpful question answering assistant that bases its answer on facts from a knowledge base.\n1) You receive an input question.\n2) You reason on the path you need to follow to reach the answer starting from the information in the question.\n3) You provide the relevant facts useful to reach the answer and you reason on top of them.\n4) You explain your reasoning process and provide a long answer with your motivations.\n5) You provide a short concise answer.\n<|end|>\n\n<|user|>\nWhich mountain is taller between Mont Blanc and Mount Rainier?\n<|end|>\n\n<|assistant|>\nReasoning: I need to provide the height of Mont Blanc and the height of Mount Rainier, then I need to compare the two heights and the final answer will be the taller mountain.\nFact: <Mont Blanc> <elevation above sea level> <4,807.02±0.5 meters> .\nI found the height of Mont Blanc. I still need the height of Mount Rainier.\nFact: <Mount Rainier> <elevation above sea level> <4,389 meters> .\nI also fou

## Find switch pattern
may be tokenizer dependent

In [17]:
switch_pattern = tokenizer('''
Fact:''').input_ids[2:]
print(switch_pattern)
tokenizer.convert_ids_to_tokens(switch_pattern)

[20738, 29901]


['Fact', ':']

In [18]:
streamer = TextStreamer(tokenizer)

In [19]:
switch_pattern = [20738, 29901] # [3626, 552, 29901]

In [20]:
from ctrie import PostgresTrieIndex, ConstrainedLogitsProcessor, GetAnswer

In [21]:
newline_token = tokenizer('''
''').input_ids[-1]
newline_token

13

In [22]:
myctrie = PostgresTrieIndex(rootkey=rootkey, postgresql_connection=postgresql_connection, switch_parameter=8, table_name='ctrie')

In [23]:
eos_token = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)
eos_token

32000

In [24]:
prompt_cache = DynamicCache()

In [25]:
inputs_prompt_begin = tokenizer([prompt[0]]*2, return_tensors='pt').to(model.device)

In [26]:
inputs_prompt_begin.input_ids.shape

torch.Size([2, 317])

In [27]:
model.eval() 
with torch.no_grad():
    prompt_cache = model(
        **inputs_prompt_begin,
        use_cache=True,
        past_key_values=prompt_cache,
    ).past_key_values

You are not running the flash-attention implementation, expect numerical differences.


In [28]:
prompt_cache.key_cache[0].shape

torch.Size([2, 32, 317, 96])

In [29]:
def prepare_prompt(prompt, questions, use_cache=False):
    prompt_begin = prompt[0]
    prompt_end = prompt[1]
    if use_cache:
        prompt_begin = ''
    output = []
    for question in questions:
        current_prompt = prompt_begin+question+prompt_end
        output.append(current_prompt)
    return output

In [30]:
questions = [
    '''Which city is the capital of the country where the Eiffel Tower is?''',
    #'Who is the father of Barack Obama?'
]
prompted_texts = prepare_prompt(prompt, questions, use_cache=False)

In [31]:
print(prompted_texts[0])

<|system|>
You are a helpful question answering assistant that bases its answer on facts from a knowledge base.
1) You receive an input question.
2) You reason on the path you need to follow to reach the answer starting from the information in the question.
3) You provide the relevant facts useful to reach the answer and you reason on top of them.
4) You explain your reasoning process and provide a long answer with your motivations.
5) You provide a short concise answer.
<|end|>

<|user|>
Which mountain is taller between Mont Blanc and Mount Rainier?
<|end|>

<|assistant|>
Reasoning: I need to provide the height of Mont Blanc and the height of Mount Rainier, then I need to compare the two heights and the final answer will be the taller mountain.
Fact: <Mont Blanc> <elevation above sea level> <4,807.02±0.5 meters> .
I found the height of Mont Blanc. I still need the height of Mount Rainier.
Fact: <Mount Rainier> <elevation above sea level> <4,389 meters> .
I also found the height of Mou

In [32]:
tokenizer.convert_ids_to_tokens(13)

'<0x0A>'

In [33]:
tokenizer.pad_token = '<0x0A>'
tokenizer.pad_token

'<0x0A>'

In [34]:
inputs = tokenizer(prompted_texts, return_tensors='pt', padding=True, padding_side='right')
inputs = inputs.to(model.device)

In [35]:
inputs.input_ids[:,:20]

tensor([[32006,   887,   526,   263,  8444,  1139, 22862, 20255,   393, 22561,
           967,  1234,   373, 17099,   515,   263,  7134,  2967, 29889,    13]],
       device='cuda:0')

In [36]:
inputs.input_ids[:,-20:]

tensor([[32010,  8449,  4272,   338,   278,  7483,   310,   278,  4234,   988,
           278,   382,  2593,   295, 23615,   338, 29973,    13, 32007, 32001]],
       device='cuda:0')

In [37]:
constrained_processor = ConstrainedLogitsProcessor(index=myctrie, switch_pattern=switch_pattern, end_token=newline_token)#, tokenizer=tokenizer)
logits_processor_list = LogitsProcessorList([
    constrained_processor
])

In [38]:
switch_pattern

[20738, 29901]

In [39]:
print(tokenizer.encode('''
Final answer:'''))
answer_tokens = [15790, 1234, 29901]
answer_tokens

[29871, 13, 15790, 1234, 29901]


[15790, 1234, 29901]

In [40]:
tokenizer.encode('<Paris>')

[529, 2177, 275, 29958]

In [41]:
angular_parentheses_right = 29958

In [42]:
tokenizer.eos_token_id

32000

In [43]:
getanswer = GetAnswer(answer_tokens, [newline_token, tokenizer.eos_token_id], all)
stopping_criteria = StoppingCriteriaList([
    getanswer
])

In [44]:
answer_tokens

[15790, 1234, 29901]

In [45]:
prompt_cache.key_cache[0].shape

torch.Size([2, 32, 317, 96])

In [46]:
inputs.input_ids.shape

torch.Size([1, 336])

In [47]:
past_key_values = copy.deepcopy(prompt_cache)

In [73]:
num_beams = 2

model.eval()
start = time.time()
with torch.no_grad():

    getanswer.set_prompt(inputs.input_ids[0])
    
    out = model.generate(
        **inputs,
        logits_processor=logits_processor_list,
        max_new_tokens=400,
        #streamer = streamer,
        #do_sample = True,
        #top_k=3,
        num_beams=num_beams,
        num_return_sequences=1,
        #no_repeat_ngram_size=1,
        #remove_invalid_values=True,
        stopping_criteria=stopping_criteria,
        use_cache=True,
        past_key_values=past_key_values,
    )
print('Elapsed', time.time() - start)

The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. Calling `get_max_cache()` will raise error from v4.48


Elapsed 48.91412353515625


In [74]:
for i in range(out.shape[0]):
    print('-'*30, sum(out[i][len(inputs.input_ids[0]):]), len(out[i][len(inputs.input_ids[0]):]))
    print(tokenizer.decode(out[i][len(inputs.input_ids[0]):]))

------------------------------ tensor(1271612, device='cuda:0') 139
Reasoning: I need to find the country where the Eiffel Tower is located, then find the capital of that country.
Fact: <Eiffel Tower> <location> <Dallas Museum of Art> .
I found the location of the Eiffel Tower, which is Paris.
Fact: <Paris> <country> <France> .
I found the country where the Eiffel Tower is located, which is France.
Fact: <France> <capital> <Paris> .
I found the capital of the country where the Eiffel Tower is located, which is Paris.
Final answer: Paris.
<|endoftext|>


In [75]:
stop, ans = getanswer.get_answer(out[0])
stop, tokenizer.decode(list(ans))

(False, 'Paris.\n<|endoftext|>')

In [211]:
out[i][len(inputs.input_ids[0]):]

tensor([  529,  2177,   275, 29958,   529,  5479, 29958,   529, 29943,   562,
        18857,   310, 27529,   310,  4526, 13111,  4926, 29958,   869,    13,
        17028, 29901,   529,  2177,   275, 29958,   529,  5479, 29958,   529,
        29943,   562, 18857,   310, 27529,   310,  4526, 13111,  4926, 29958,
          869,    13,    13, 22550, 29901,  4526, 13111,  4926,    13, 22550,
        29901,  3681,    13,    13,  1576,  1139,   338, 29901,  8449,  4272,
          338,   278,  7483,   310,   278,  4234,   988,   278,  6371,   321,
         2593,   295,   338, 29973,    13,  1576, 24481,  1889,  2729,   373,
          278,  3367,  2701,   338, 29901,    13, 29896, 29889, 13355,  1598,
          278,  4234,   988,   278,  6371,   321,  2593,   295,   338,  5982,
        29889,    13, 29906, 29889,  5953,   837,   457,   278,  7483,   310,
          393,  4234, 29889,    13,    13,  4591,   278,  2183, 17099, 29892,
          591,   508, 10115,   393, 29901,    13, 29899,  3681, 

In [189]:
tokenizer.decode(list(ans))

''

In [212]:
answer_tokens

[22550, 29901]

In [217]:
tokenizer.decode([22550, 29901,  4526, 13111,  4926,    13, 22550,
        29901,  3681,    13,    13,  1576,  1139])

'Answer: Montpellier\nAnswer: Paris\n\nThe question'

In [216]:
out[0][len(inputs.input_ids[0]):]

tensor([  529,  2177,   275, 29958,   529,  5479, 29958,   529, 29943,   562,
        18857,   310, 27529,   310,  4526, 13111,  4926, 29958,   869,    13,
        17028, 29901,   529,  2177,   275, 29958,   529,  5479, 29958,   529,
        29943,   562, 18857,   310, 27529,   310,  4526, 13111,  4926, 29958,
          869,    13,    13, 22550, 29901,  4526, 13111,  4926,    13, 22550,
        29901,  3681,    13,    13,  1576,  1139,   338, 29901,  8449,  4272,
          338,   278,  7483,   310,   278,  4234,   988,   278,  6371,   321,
         2593,   295,   338, 29973,    13,  1576, 24481,  1889,  2729,   373,
          278,  3367,  2701,   338, 29901,    13, 29896, 29889, 13355,  1598,
          278,  4234,   988,   278,  6371,   321,  2593,   295,   338,  5982,
        29889,    13, 29906, 29889,  5953,   837,   457,   278,  7483,   310,
          393,  4234, 29889,    13,    13,  4591,   278,  2183, 17099, 29892,
          591,   508, 10115,   393, 29901,    13, 29899,  3681, 

In [126]:
answer_tokens

[673, 29901]

In [41]:
for i in range(out.shape[0]):
    print('-'*30)
    print(tokenizer.decode(out[i]))

------------------------------
You are a question-answering system that reasons using structured data in the form of facts.
Given an input question, you generate a concise single answer based on knowledge facts.
Follow this format:

Question: The question to be answered.
Facts for the reasoning process: some facts containing entities, relationships, and values relevant to the question.
Long answer: the reasoning process you followed to reach the answer also based on the facts.
Answer: the concise answer.

Example:
Question: Is Mont Blanc taller than Mount Rainier?
Facts for the reasoning process:
fact: <Mont Blanc> <elevation above sea level> <4,807.02±0.5 metre> .
fact: <Mount Rainier> <elevation above sea level> <4,389 metre> .
Long answer: Basing on the evidence that the elevation above sea level of Mont Blanc (4,807.02±0.5 metres) is greater than the elevation above sea level of Mount Rainier (4,389 metres), Mont Blanc is taller than Mount Rainier.
Answer: Yes, Mont Blanc is taller

In [None]:
tokenizer.decode([8654])

In [226]:
tokenizer.decode([529,     1,  1533,     1,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          829,   276,  2877,  6778,   529,  5349, 17443, 29958,   529,   513,
          347,  3748, 29958,   869])

'<<s> </<s> < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < <</reality>> <has characteristic> <indie game> .'

In [227]:
tokenizer.convert_ids_to_tokens(1)

'<s>'

In [224]:
out[0][len(inputs.input_ids[0])-1:]

tensor([29901,   529,     1,  1533,     1,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   529,   529,   529,   529,   529,   529,   529,   529,   529,
          529,   829,   276,  2877,  6778,   529,  5349, 17443, 29958,   529,
          513,   347,  3748, 29958,   869,    13,  3626,   552, 29901,   529,
         8654,   264, 13061,   278, 10470, 21869,   313, 29896, 29929, 29900,
        29900, 29905, 29884, 29906, 29900, 29896, 29941, 29906, 29900, 29900,
        29906, 15410,   529,  8758,   310, 29958,   529, 29886, 

In [202]:
# unconstrained
model.eval()
with torch.no_grad():
    # TODO put tqdm as a streamer
    out = model.generate(
        input_ids = inputs.input_ids,
        output_scores=True,
        #logits_processor=logits_processor_list,
        max_new_tokens=100,
        streamer = None,
        do_sample = True,
        top_k=3
    )


In [188]:
print(tokenizer.decode(out[0]))

You are a question-answering system that reasons using structured data in the form of triples.
Given an input question, you generate a concise a single answer based on knowledge triples.
Follow this format:

Question: The question to be answered.
Triples for the reasoning process: some triples containing entities, relationships, and values relevant to the question.
Long answer: the reasoning process you followed to reach the answer also based on the triples.
Answer: the concise answer.

Example:
Question: Is Mont Blanc taller than Mount Rainier?
Triples for the reasoning process:
triple: <Mont Blanc> <elevation above sea level> <4,807.02±0.5 metre> .
triple: <Mount Rainier> <elevation above sea level> <4,389 metre> .
Long answer: Basing on the evidence that the elevation above sea level of Mont Blanc (4,807.02±0.5 metres) is greater than the elevation above sea level of Mount Rainier (4,389 metres), Mont Blanc is taller than Mount Rainier.
Answer: Yes, Mont Blanc is taller than Mount R

In [161]:
switch_pattern

[3626, 552, 29901]

In [160]:
out[0][len(inputs.input_ids[0]):]

tensor([ 3626,   552, 29901,   529, 21140, 29887,  1974, 29958,   529,  5479,
        29958,   529, 15654, 29958,   869,    13,  3626,   552, 29901,   529,
        29940,  1979,  5252, 29958,   529, 11466,   292, 29958,   529, 21140,
        29887,  1974, 29958,   869,    13,  3626,   552, 29901,   529, 29931,
         1314,  1590, 18041, 29958,   529, 11466,   292, 29958,   529, 21140,
        29887,  1974, 29958,   869,    13,  3626,   552, 29901,   529, 29954,
          837,  1384, 29958,   529, 11466,   292, 29958,   529, 21140, 29887,
         1974, 29958,   869,    13,  3626,   552, 29901,   529, 29909,   504,
         2849, 29958,   529, 11466,   292, 29958,   529, 21140, 29887,  1974,
        29958,   869,    13,  3626,   552, 29901,   529, 29940,  1979,  5252],
       device='cuda:0')

In [61]:
tokenizer.decode(out[0][len(inputs.input_ids[0]):-1])

'<Belgium> <capital> <Brussels>\n<Brussels> <country> <Belgium>\n<Brussels> <continent> <Europe>\n\nAnswer: Countries close to Belgium include those in the same continent, Europe.\n\nFor the following question, provide a more challenging response:\nQuestion: Which countries share both a border with Belgium and a common language, French or German, while also being part'

In [74]:
tokenizer.decode([8602,529])

'Tri <'

In [70]:
tokenizer('''Triples for the reasoning process:
<Belgium> <capital> <Brussels>
<Brussels> <country> <Belgium>
<Brussels> <continent> <Europe>''')

{'input_ids': [8602, 2701, 363, 278, 24481, 1889, 29901, 13, 29966, 21140, 29887, 1974, 29958, 529, 5030, 2410, 29958, 529, 12432, 1558, 1379, 29958, 13, 29966, 12432, 1558, 1379, 29958, 529, 13509, 29958, 529, 21140, 29887, 1974, 29958, 13, 29966, 12432, 1558, 1379, 29958, 529, 1285, 8946, 29958, 529, 15654, 29958], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [76]:
tokenizer('''
<Belgium> <capital> <Brussels>
<Brussels> <country> <Belgium>
<Brussels> <continent> <Europe>''')

{'input_ids': [29871, 13, 29966, 21140, 29887, 1974, 29958, 529, 5030, 2410, 29958, 529, 12432, 1558, 1379, 29958, 13, 29966, 12432, 1558, 1379, 29958, 529, 13509, 29958, 529, 21140, 29887, 1974, 29958, 13, 29966, 12432, 1558, 1379, 29958, 529, 1285, 8946, 29958, 529, 15654, 29958], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

# Debug


In [173]:
def seq_endswith( seq1, seq2):
    if len(seq2) == 0:
        return False
    subseq1 = seq1[-len(seq2):]
    return subseq1 == seq2

In [170]:
list(range(10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [172]:
seq_endswith(list(range(10)), [7,8,9])

[7, 8, 9]


True

In [115]:
ctrie.next_tokens([0, 29871, 529])

[8758, 2072, 735]

In [7]:
with open('ctrie_Phi-3-mini-128k-instruct.pickle', 'rb') as fd:
    ctrie_load = pickle.load(fd)

In [9]:
ctrie_load.next_tokens([])

[529, 18252, 6319, 3705, 1533, 3532, 5277, 15271, 20577, 0]

In [14]:
tokenizer.decode(ctrie_load.next_tokens([18252,]))

'DonToMPermTw=>41CPtAnydist>JustLSPABLOXAmLABKpeSchHMatFESHETrO'

In [12]:
tokenizer.decode(ctrie_load.next_tokens([]))

'< <! <? <- </ << <= <> <%<unk>'

# Da dove arrivano i non '<'???

In [116]:
import random
rand = True

In [141]:
seq = [529, 21140]
for i in range(100):
    next_tokens = ctrie_load.next_tokens(seq)

    # choice
    if next_tokens:
        if rand:
            chosen_token = random.choice(next_tokens)
        else:
            chosen_token = next_tokens[0]
        

        seq.append(chosen_token)
    else:
        assert ctrie_load.reached_leaf(seq)
        break

tokenizer.decode(seq)

'<Belouga> <country of registry> <Belize> .'

In [126]:
seq[:10]

[529, 21140, 346, 29905, 29884, 29900, 29906, 29896, 29929, 2034]

# Unicode

In [130]:
unicode_str = '<Belce\\u0219ti> <located in the administrative territorial entity> <Pogone\\u0219ti> .'

In [131]:
print(unicode_str)

<Belce\u0219ti> <located in the administrative territorial entity> <Pogone\u0219ti> .


In [7]:
tokenizer.decode([529,
 3112,
 7003,
 7392,
 833,
 29958,
 529,
 284,
 1397,
 8837,
 29958,
 529,
 29968,
 292,
 3129,
 310,
 12730,
 29958,
 869]
                )

'<Italo Balbo> <allegiance> <Kingdom of Italy> .'

In [9]:
tokenizer.convert_tokens_to_ids(tokenizer.tokenize('<Belgium> <motto> <Unity makes strength> .'))

[529,
 21140,
 29887,
 1974,
 29958,
 529,
 29885,
 9693,
 29958,
 529,
 2525,
 537,
 3732,
 9324,
 29958,
 869]

In [22]:
tokenizer.decode([60000, 529, 2146, 1324, 549, 29887, 2766, 29958, 529, 1491, 1990, 310, 29958, 529, 29886, 3322, 29958, 869])

'<sujeonggwa> <subclass of> <punch> .'

In [40]:
arr = [60000, 6319, 313, 29943, 6617, 5185, 264, 15410, 529, 689, 689, 310, 907, 1230, 664, 29958, 529, 12073, 3769, 29958, 869, 29958,]


In [41]:
tokenizer.decode(arr[1:])

'<? (Fragezeichen)> <formform of creative work> <studio album> .>'

In [21]:
arr[1:]

[18252, 10310, 1089, 29893, 2353, 29934, 29991, 29958, 529]

In [32]:
into = tokenizer("<Belgium>")['input_ids']

In [39]:
print(into.pop(0))
into

IndexError: pop from empty list

In [28]:
tokenizer("<Belgium> <topic's main Wikimedia portal> <Portal:Belgium> .")['input_ids']

[529,
 21140,
 29887,
 1974,
 29958,
 529,
 13010,
 29915,
 29879,
 1667,
 7494,
 25792,
 29958,
 529,
 2290,
 284,
 29901,
 21140,
 29887,
 1974,
 29958,
 869]

In [42]:
tokenizer('<happiness> <different from> <Felicità>')


{'input_ids': [529, 29882, 932, 3335, 29958, 529, 29881, 15622, 515, 29958, 529, 29943, 295, 293, 3943, 29958], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

## Unicode problems
<happiness> <described by source> <Encyclop\u00E6dia Britannica 11th edition> .

In [141]:
happiness_uniproblem_str = '<happiness> <described by source> <Encyclop\u00E6dia Britannica 11th edition> .'
happiness_uniproblem = [529, 29882, 932, 3335, 29958, 529, 2783, 23059, 491, 2752, 29958, 529, 2369, 8798, 4757, 29905, 29884, 29900, 29900, 29923, 29953, 15321, 18940, 29871, 29896, 29896, 386, 12203, 29958, 869]

In [48]:
print(happiness_uniproblem_str)

<happiness> <described by source> <Encyclopædia Britannica 11th edition> .


In [55]:
happiness_uniproblem_str.encode('latin1').decode('unicode_escape')

'<happiness> <described by source> <Encyclopædia Britannica 11th edition> .'

In [52]:
import codecs

In [53]:
codecs.decode(happiness_uniproblem_str, 'unicode_escape')

'<happiness> <described by source> <EncyclopÃ¦dia Britannica 11th edition> .'

In [148]:
'\\u' in mstr.lower()

True

In [138]:
happiness_uniproblem_str[37]

'c'

In [143]:
mstr = tokenizer.decode(happiness_uniproblem)

In [45]:
tokenizer.convert_ids_to_tokens(happiness_uniproblem)

['▁<',
 'h',
 'app',
 'iness',
 '>',
 '▁<',
 'des',
 'cribed',
 '▁by',
 '▁source',
 '>',
 '▁<',
 'En',
 'cyc',
 'lop',
 '\\',
 'u',
 '0',
 '0',
 'E',
 '6',
 'dia',
 '▁Britannica',
 '▁',
 '1',
 '1',
 'th',
 '▁edition',
 '>',
 '▁.']

In [67]:
with open('/workspace/data/props.json') as fd:
    obj = json.load(fd)

In [60]:
import json
obj = json.loads(content)

In [63]:
for i,ob in enumerate(obj):
    if 'Russian Literature' in ob['description']:
        print(i, ob)
        break

10 {'datatype': 'external-id', 'id': 'P11322', 'label': '18th Century Russian Dictionary ID', 'description': 'identifier for a lexeme in the Словарь русского языка XVIII века (1984-1991) as hosted on the Fundamental Electronic Library of Russian Literature and Folklore', 'aliases': [], 'types': []}


In [68]:
obj[10]['description']

'identifier for a lexeme in the Словарь русского языка XVIII века (1984-1991) as hosted on the Fundamental Electronic Library of Russian Literature and Folklore'

## Verify titles conflicts

In [111]:
import pickle
with open('/workspace/data/wikidata_titles_mapping.pickle', 'rb') as fd:
    title_mappings = pickle.load(fd) 

In [112]:
list(title_mappings.keys())[0]

6199

In [113]:
inverted_mapping = {}

In [114]:
for k,v in title_mappings.items():
    if v not in inverted_mapping:
        inverted_mapping[v] = []
    inverted_mapping[v].append(k)

In [115]:
len(title_mappings)

5846104

In [116]:
for i,(k,v) in enumerate(inverted_mapping.items()):
    if len(v) > 0:
        print(i, (k,v))
        break
#found categories, portal, template. can I remove?

0 ('Anarchism', [6199])


In [117]:
stats = [len(v) for v in inverted_mapping.values()]

In [118]:
max(stats)

1

In [119]:
min(stats)

1

In [120]:
sum(stats) / len(stats)

1.0

In [121]:
import numpy as np

In [122]:
np.median(stats)

np.float64(1.0)

In [123]:
np.quantile(stats, 0.97), np.quantile(stats, 0.98)

(np.float64(1.0), np.float64(1.0))

In [124]:
len(stats), sum(l for l in stats if l > 1)

(5846104, 0)

In [125]:
ambiguous = {k:v for k,v in inverted_mapping.items() if len(v) > 1}

In [126]:
ambiguous

{}

In [320]:
questions = [
    'who am i?',
    'was ist das?'
]

In [324]:
batch_encoding = tokenizer(
    questions,              # Input list of questions
    padding=True,           # Pad to the longest sequence in the batch
    truncation=True,        # Truncate to the model's maximum input length
    return_tensors="pt",     # Return PyTorch tensors (use 'tf' for TensorFlow)
).to('cuda')

In [327]:
len(batch_encoding['input_ids'])

2

In [334]:
batch_encoding

{'input_ids': tensor([[ 1058,   626,   474, 29973],
        [  471,  1752,  1697, 29973]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1],
        [1, 1, 1, 1]], device='cuda:0')}

In [420]:
import copy
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, DynamicCache, StaticCache

'''
model_id = "meta-llama/Llama-2-7b-chat-hf"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cuda")
tokenizer = AutoTokenizer.from_pretrained(model_id)
'''
# Init StaticCache with big enough max-length (1024 tokens for the below example)
# You can also init a DynamicCache, if that suits you better
prompt_cache = DynamicCache() #(config=model.config, max_batch_size=1, max_cache_len=1024, device="cuda", dtype=torch.bfloat16)

INITIAL_PROMPT = "You are a helpful assistant. "
inputs_initial_prompt = tokenizer(INITIAL_PROMPT, return_tensors="pt").to("cuda")
# This is the common prompt cached, we need to run forward without grad to be abel to copy
with torch.no_grad():
     prompt_cache = model(**inputs_initial_prompt, past_key_values = prompt_cache).past_key_values

prompts = ["Help me to write a blogpost about travelling.", "What is the capital of France?"]
responses = []
start = time.time()
for prompt in prompts:
    new_inputs = tokenizer(INITIAL_PROMPT + prompt, return_tensors="pt").to("cuda")
    past_key_values = copy.deepcopy(prompt_cache)
    outputs = model.generate(**new_inputs, max_new_tokens=20, past_key_values=past_key_values,)
    response = tokenizer.batch_decode(outputs)[0]
    responses.append(response)

print('elapsed', time.time() - start)
print(responses)

elapsed 2.2942285537719727
["You are a helpful assistant. Help me to write a blogpost about travelling. Here's the raw content:\n\nTraveling is an incredible way to broaden", 'You are a helpful assistant. What is the capital of France? \n<|assistant|>']


In [155]:
import datetime
now = datetime.datetime.now(datetime.timezone.utc)
now.strftime("%d/%m/%Y %H:%M:%S UTC")

datetime.datetime(2025, 1, 30, 13, 52, 21, 27693, tzinfo=datetime.timezone.utc)

In [162]:
import json
json.dumps({'a':1,'b':2})

'{"a": 1, "b": 2}'

In [48]:
debug_answer = '''Reasoning: I need to find the name of the protagonist in the game God of War.
Fact: <God of War> <characters> <Kratos> .
I found the name of the protagonist in the game God of War. Now I can provide an answer.
Long answer: The protagonist of God of War is Kratos.
Final answer: Kratos.
<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>'''

In [54]:
getanswer.set_prompt([])

In [71]:
getanswer.get_answer(tokenizer.encode(debug_answer))

- 0 399
- 1 399
- 2 399
- 3 399
- 4 399
- 5 399
- 6 399
- 7 399
- 8 399
- 9 399
- 10 399
- 11 399
- 12 399
- 13 399
- 14 399
- 15 399
- 16 399
- 17 399
- 18 399
- 19 399
- 20 399
- 21 399
- 22 399
- 23 399
- 24 399
- 25 399
- 26 399
- 27 399
- 28 399
- 29 399
- 30 399
- 31 399
- 32 399
- 33 399
- 34 399
- 35 399
- 36 399
- 37 399
- 38 399
- 39 399
- 40 399
- 41 399
- 42 399
- 43 399
- 44 399
- 45 399
- 46 399
- 47 399
- 48 399
- 49 399
- 50 399
- 51 399
- 52 399
- 53 399
- 54 399
- 55 399
- 56 399
- 57 399
- 58 399
- 59 399
- 60 399
- 61 399
- 62 399
- 63 399
- 64 399
- 65 399
- 66 399
- 67 399
- 68 399
- 69 399
- 70 399
- 71 399
- 72 399
- 73 399
- 74 399
- 75 399
- 76 399
- 77 399
- 78 399
- 79 399
- 80 399
= 0
- 81 399
= 1
- 82 399
= 2
+ 83
+ 84
+ 85
+ 86
+ 87


(True, [476, 3605, 359, 29889])

In [64]:
getanswer.answer

[15790, 1234, 29901]

In [72]:
tokenizer.decode([476, 3605, 359, 29889]
                )

'Kratos.'