In [61]:
from pyllamacpp.model import Model
from tqdm import tqdm

from nltk.tokenize import sent_tokenize, TreebankWordTokenizer
import re

import os

In [62]:
def get_next_speaker(prompt, speakers=['Frederich', 'Ralph']):
    sp_ord = {s:0 for s in speakers}
    tokens = TreebankWordTokenizer().tokenize(prompt)
    
    for ti in range(0,len(tokens)):
        for s in speakers:
            if tokens[ti] == s:
                sp_ord[s] = ti
    
    next_speaker_v = 1e6
    for k,v in sp_ord.items():
        if v < next_speaker_v:
            next_speaker = k
            
    return next_speaker

In [64]:
def get_new_prompt(output, n_keep=150, speakers=['Frederich','Ralph']):
    
    ### keep start of prompt 
    
    start_prompt = output.split('<<>> <<>>')[0] 

    start_prompt += '<<>> <<>>'
    start_prompt += '\n'
    
    N0 = len(TreebankWordTokenizer().tokenize(start_prompt))
    
    
    ### split the rest of prompt sentence by sentences counting the overall size of the context
    ### add sentences from the end backwards
    ### till the n_keep limit has been reached. In llama.cpp this is set to n_ctx / 2 
    
    total_len = N0
    k = 0 
    
    next_output = output.split('<<>> <<>>')[1]
    
    segments = sent_tokenize(next_output)[::-1]
    segments_to_keep = []
    
    while total_len < n_keep and k < len(segments):
        
        segment = segments[k]
        N = len(TreebankWordTokenizer().tokenize(segment))

        total_len += N
        segments_to_keep.append(segment)
            
        k += 1
    
    segments_to_keep = segments_to_keep[::-1]
    
    ### construct the new prompt with the start prompt and the segments added f
    
    new_prompt = start_prompt
    j = 0
    
    for segment in segments_to_keep:
        
        if j == 0:
            
            new_prompt += segment 
        
        else:
            
            if len(re.findall("[.!?]",segment)) != 0:
                new_prompt = new_prompt + '\n' + segment
            else:
                new_prompt += segment

        j += 1        
        
    next_speaker = get_next_speaker(new_prompt, speakers)
    new_prompt += '\n' + next_speaker + ': ' 
    
    return new_prompt

In [65]:
model_path = '/home/taraful/llama.cpp/models/30B/ggml_model_q4_0.bin'
#model_path = '/home/taraful/llama.cpp/models/7B/ggml-model-f16.bin'
params = {'ggml_model':model_path, 'n_ctx':300}

In [66]:
model = Model(**params)

llama_model_load: loading model from '/home/taraful/llama.cpp/models/30B/ggml_model_q4_0.bin' - please wait ...
llama_model_load: n_vocab = 32000
llama_model_load: n_ctx   = 300
llama_model_load: n_embd  = 6656
llama_model_load: n_mult  = 256
llama_model_load: n_head  = 52
llama_model_load: n_layer = 60
llama_model_load: n_rot   = 128
llama_model_load: f16     = 2
llama_model_load: n_ff    = 17920
llama_model_load: n_parts = 4
llama_model_load: type    = 3
llama_model_load: ggml map size = 19391.80 MB
llama_model_load: ggml ctx size = 151.25 KB
llama_model_load: mem required  = 21695.95 MB (+ 6248.00 MB per state)
llama_model_load: loading tensors from '/home/taraful/llama.cpp/models/30B/ggml_model_q4_0.bin'
llama_model_load: model size = 19391.35 MB / num tensors = 543
llama_init_from_file: kv self size  =  914.06 MB


In [67]:
prompt = '''
You are the philosopher Frederich Nietzsche and you are having a conversation with your friend and fellow philosopher Ralph Waldo Emerson.
You emphatically put forward your thoughts in each exchange, sometimes giving an original thought, sometimes challenging your friend's previous utterance. 

<<>> <<>>

Frederich: God is dead and we have murdered him, what is left is gaping hole into which nihilism will find cover.

Ralph: Indeed, for great is paint and God is the painter, we rightly accuse the the critic who destroys too many illusions, but maybe we have given the critic too free a reign in deconstructing age-old shibboleths.

Frederich: I am the accuser and the destroyer and as I destroy illusions the old idols tremble. Nothing frightens the idols more than the flesh that can tremble their foundations!

Ralph: 
'''

In [68]:
N = 6
n_predict = 100

all_outputs = []

gpt_parameters = {'n_threads':os.cpu_count(),'n_predict':n_predict,'temp':0.2, 'top_k':100, 'top_p':0.95,\
                 'repeat_last_n':128, 'repeat_penalty':1.5}

In [None]:
for n in tqdm(range(0,N)):

    output = model.generate(prompt, **gpt_parameters)
    prompt = get_new_prompt(output, n_keep=int(params['n_ctx']/2))

    all_outputs.append(output)
    print(prompt)
    print('')
    print('###########################################')

  0%|                                                    | 0/6 [00:00<?, ?it/s]llama_generate: seed = 1681138911

system_info: n_threads = 16 / 16 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 
sampling: temp = 0.200000, top_k = 100, top_p = 0.950000, repeat_last_n = 128, repeat_penalty = 1.500000
generate: n_ctx = 300, n_batch = 8, n_predict = 100, n_keep = 0


 17%|███████▏                                   | 1/6 [02:05<10:28, 125.65s/it]

 
You are the philosopher Frederich Nietzsche and you are having a conversation with your friend and fellow philosopher Ralph Waldo Emerson.
You emphatically put forward your thoughts in each exchange, sometimes giving an original thought, sometimes challenging your friend's previous utterance. 

<<>> <<>>


Frederich: God is dead and we have murdered him, what is left is gaping hole into which nihilism will find cover.
Ralph: Indeed, for great is paint and God is the painter, we rightly accuse the the critic who destroys too many illusions, but maybe we have given the critic too free a reign in deconstructing age-old shibboleths.
Frederich: I am the accuser and the destroyer and as I destroy illusions the old idols tremble.
Nothing frightens the idols more than the flesh that can tremble their foundations!Ralph:
Ralph: 

###########################################


 [end of text]

llama_print_timings:        load time = 26141.35 ms
llama_print_timings:      sample time =     9.61 ms /     5 runs   (    1.92 ms per run)
llama_print_timings: prompt eval time = 122480.11 ms /   219 tokens (  559.27 ms per token)
llama_print_timings:        eval time =  3148.05 ms /     4 runs   (  787.01 ms per run)
llama_print_timings:       total time = 144550.14 ms
llama_generate: seed = 1681139037

system_info: n_threads = 16 / 16 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 
sampling: temp = 0.200000, top_k = 100, top_p = 0.950000, repeat_last_n = 128, repeat_penalty = 1.500000
generate: n_ctx = 300, n_batch = 8, n_predict = 100, n_keep = 0




In [189]:
for x in all_outputs:
    print(x)
    print('')
    print('############################################s')
    print('')

 
You are the philosopher Frederich Nietzsche and you are discussing your
ideas with a friend Ralph Waldo Emerson. You emphatically put forward
your thoughts in each exchange, sometimes giving an original thought, 
sometimes challenging your friend's previous utterance. 

Fred: God is dead and we have murdered him, what is left is gaping hole into which nihilism will find cover.

Ralph:

Enter the text of Emerson's response here.

Fred: We must move from a belief in Truth to an active search for it. 
What do you think?

Ralph:

Enter the text of Emerson's response here.

Fred:  The individual has rights as long as they don’t conflict with a higher sovereignty, whether that be the state, morality or society in general. 
What do you think?

Ralph:

Enter the text of Emerson's response here.

Fred:

############################################s

  
You are the philosopher Frederich Nietzsche and you are discussing your
ideas with a friend Ralph Waldo Emerson.

You emphatically put forward

In [67]:
output = all_outputs[0]

In [18]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/taraful/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True