In [1]:
import sys
import os

# Get the current directory
current_dir = os.path.dirname(os.path.abspath('__file__'))

# Get the parent directory
parent_dir = os.path.dirname(current_dir)

# Add the parent directory to sys.path
sys.path.append(parent_dir)
try:
    from config.CONFIG import PERSIST_DIRECTORY, MODEL_PATH
    print('successfully loaded custom CONFIG')
except Exception as e:
    print(e)
    print('loaded default configuration')
    from config.DEFAULT_CONFIG import PERSIST_DIRECTORY, MODEL_PATH

successfully loaded custom CONFIG


In [2]:
from llama_cpp import Llama
import inspect

signature = inspect.signature(Llama.__init__)

for name, param in signature.parameters.items():
    print(f"Parameter: {name}")
    

Parameter: self
Parameter: model_path
Parameter: n_gpu_layers
Parameter: split_mode
Parameter: main_gpu
Parameter: tensor_split
Parameter: rpc_servers
Parameter: vocab_only
Parameter: use_mmap
Parameter: use_mlock
Parameter: kv_overrides
Parameter: seed
Parameter: n_ctx
Parameter: n_batch
Parameter: n_threads
Parameter: n_threads_batch
Parameter: rope_scaling_type
Parameter: pooling_type
Parameter: rope_freq_base
Parameter: rope_freq_scale
Parameter: yarn_ext_factor
Parameter: yarn_attn_factor
Parameter: yarn_beta_fast
Parameter: yarn_beta_slow
Parameter: yarn_orig_ctx
Parameter: logits_all
Parameter: embedding
Parameter: offload_kqv
Parameter: flash_attn
Parameter: last_n_tokens_size
Parameter: lora_base
Parameter: lora_scale
Parameter: lora_path
Parameter: numa
Parameter: chat_format
Parameter: chat_handler
Parameter: draft_model
Parameter: tokenizer
Parameter: type_k
Parameter: type_v
Parameter: spm_infill
Parameter: verbose
Parameter: kwargs


In [3]:


llm = Llama(
      model_path=MODEL_PATH,
      n_gpu_layers=-1, # Uncomment to use GPU acceleration
      # seed=1337, # Uncomment to set a specific seed
      n_ctx=2048 # Uncomment to increase the context window
)
output = llm(
      "Q: Name the planets in the solar system? A: ", # Prompt
      max_tokens=None, # Generate up to 32 tokens, set to None to generate up to the end of the context window
      stop=["Q:"], # Stop generating just before the model would generate a new question
      echo=True # Echo the prompt back in the output
) # Generate a completion, can also call create_completion
print(output)

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from C:\Users\crossfire234\Desktop\Software\LLMs\.gguf\mistral-7b-instruct-v0.2.Q2_K.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128

{'id': 'cmpl-619298ba-2c6b-4a3b-84e2-8f25e76a50af', 'object': 'text_completion', 'created': 1730698644, 'model': 'C:\\Users\\crossfire234\\Desktop\\Software\\LLMs\\.gguf\\mistral-7b-instruct-v0.2.Q2_K.gguf', 'choices': [{'text': "Q: Name the planets in the solar system? A: 1. Mercury: The smallest and closest planet to the Sun.\n\n2. Venus: The second planet from the Sun, it's known as Earth's 'sister planet' due to their similar size and composition.\n\n3. Earth: Our home planet, located third from the Sun.\n\n4. Mars: The red planet, named for its reddish appearance due to iron oxide (rust) on its surface.\n\n5. Jupiter: The largest planet in our solar system, known for its Great Red Spot, a storm that is believed to be a raging jet wind.\n\n6. Saturn: Known for its prominent ring system.\n\n7. Uranus: A gas giant tilted on its side.\n\n8. Neptune: The furthest planet from the Sun.\n\n9. Pluto: A dwarf planet, known for its heart-shaped glacier on its surface.\n\nOther notable celest

In [4]:
del llm

In [5]:
llm = Llama(
      model_path=MODEL_PATH,
      n_gpu_layers=-1, # Uncomment to use GPU acceleration
      # seed=1337, # Uncomment to set a specific seed
      n_ctx=2048, # Uncomment to increase the context window
      embedding=True
)

embeddings = llm.create_embedding('hello world')
print('\nResult:\n')
print(embeddings)

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from C:\Users\crossfire234\Desktop\Software\LLMs\.gguf\mistral-7b-instruct-v0.2.Q2_K.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128


Result:

{'object': 'list', 'data': [{'object': 'embedding', 'embedding': [[-1.9626355171203613, 1.9982613325119019, -2.662684917449951, 2.9058785438537598, 1.6578469276428223, -4.690700054168701, 3.3722028732299805, 1.975972056388855, -0.5039074420928955, -3.8630638122558594, -4.201977252960205, -3.294072389602661, 0.5889214873313904, 1.7289477586746216, -4.261417865753174, 4.910633563995361, -2.4925742149353027, 1.2609111070632935, -3.3125851154327393, 2.5527327060699463, -2.3813459873199463, 2.565943717956543, -2.1254687309265137, -0.9111056923866272, 1.8653067350387573, 3.9763596057891846, 5.258307456970215, 0.5806244611740112, -8.089441299438477, -5.601531028747559, 2.0355145931243896, -1.3711175918579102, 4.6413350105285645, 3.711745023727417, -4.380242347717285, -5.629105091094971, -2.597208261489868, -3.3076441287994385, -2.0729925632476807, -1.8507235050201416, 7.144933700561523, 3.9515562057495117, -2.553457736968994, -5.562294006347656, -4.734770774841309, 5.111012458801269

In [6]:
del llm

In [7]:
import inspect

def get_params(method):
    signature = inspect.signature(method)
    for name, param in signature.parameters.items():
        print(f"Parameter: {name}")

In [8]:

import chromadb
client = chromadb.Client()
collection = client.get_or_create_collection(name='apple_dog_screwdriver')
get_params(collection.add)


Parameter: ids
Parameter: embeddings
Parameter: metadatas
Parameter: documents
Parameter: images
Parameter: uris


In [9]:
get_params(collection.query)

Parameter: query_embeddings
Parameter: query_texts
Parameter: query_images
Parameter: query_uris
Parameter: n_results
Parameter: where
Parameter: where_document
Parameter: include


In [10]:
collection.add(
    ids = ['doc1', 'doc2'],
    documents = ['An intense focus does more than one hundred lazy days', 'Laziness puts the burden on those who make things work']
               )

In [11]:
collection.query(query_texts=['Laziness'], n_results=1)

{'ids': [['doc2']],
 'distances': [[0.3173322379589081]],
 'metadatas': [[None]],
 'embeddings': None,
 'documents': [['Laziness puts the burden on those who make things work']],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents', 'distances']}

In [12]:

llm = Llama(
      model_path=MODEL_PATH,
      n_gpu_layers=-1, # Uncomment to use GPU acceleration
      # seed=1337, # Uncomment to set a specific seed
      n_ctx=2048, # Uncomment to increase the context window
      vocab_only=True,
)
text = 'green eggs and ham'
llm.tokenize(text.encode('utf-8'))
del llm

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from C:\Users\crossfire234\Desktop\Software\LLMs\.gguf\mistral-7b-instruct-v0.2.Q2_K.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128

In [13]:
from datetime import datetime

# Get current date and time
now = datetime.now()

# Format the timestamp
timestamp = now.strftime("%Y/%m/%d-%H:%M:%S")

print(type(timestamp))

<class 'str'>


In [14]:
eggs = ['A','B','C']
for i, eggs in enumerate(eggs):
    print(i)

0
1
2


In [15]:

text = "Hello, how are you?"
model = Llama(model_path=MODEL_PATH, vocab_only=True)
# Tokenize the text
tokens = model.tokenize(text.encode('utf-8'))

# Get the text for each token
token_texts = [model.detokenize([token]).decode('utf-8').strip() for token in tokens]

# Print the results
for token, token_text in zip(tokens, token_texts):
    print(f"Token ID: {token}, Text: '{token_text}'")
del model

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from C:\Users\crossfire234\Desktop\Software\LLMs\.gguf\mistral-7b-instruct-v0.2.Q2_K.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128

Token ID: 1, Text: ''
Token ID: 22557, Text: 'Hello'
Token ID: 28725, Text: ','
Token ID: 910, Text: 'how'
Token ID: 460, Text: 'are'
Token ID: 368, Text: 'you'
Token ID: 28804, Text: '?'


AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | FMA = 1 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | LLAMAFILE = 1 | 
Model metadata: {'general.name': 'mistralai_mistral-7b-instruct-v0.2', 'general.architecture': 'llama', 'llama.context_length': '32768', 'llama.rope.dimension_count': '128', 'llama.embedding_length': '4096', 'llama.block_count': '32', 'llama.feed_forward_length': '14336', 'llama.attention.head_count': '32', 'tokenizer.ggml.eos_token_id': '2', 'general.file_type': '10', 'llama.attention.head_count_kv': '8', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'llama.rope.freq_base': '1000000.000000', 'tokenizer.ggml.model': 'llama', 'general.quantization_version': '2', 'tokenizer.ggml.bos_token_id': '1', 'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.padding_token_id': '0', 'tokenizer.ggml.add_bos_token': 'true', 

In [17]:
def chunked(iterable, chunk_size):
    for i in range(0, len(iterable), chunk_size):
        yield iterable[i:i + chunk_size]

In [None]:
numbers = list(range(0,300))

chunks = chunked(numbers, 13)
for chunk in chunks:
    print(chunk)

[0, 1, 2, 3, 4, 5, 6]
[7, 8, 9, 10, 11, 12, 13]
[14, 15, 16, 17, 18, 19, 20]
[21, 22, 23, 24, 25, 26, 27]
[28, 29, 30, 31, 32, 33, 34]
[35, 36, 37, 38, 39, 40, 41]
[42, 43, 44, 45, 46, 47, 48]
[49, 50, 51, 52, 53, 54, 55]
[56, 57, 58, 59, 60, 61, 62]
[63, 64, 65, 66, 67, 68, 69]
[70, 71, 72, 73, 74, 75, 76]
[77, 78, 79, 80, 81, 82, 83]
[84, 85, 86, 87, 88, 89, 90]
[91, 92, 93, 94, 95, 96, 97]
[98, 99, 100, 101, 102, 103, 104]
[105, 106, 107, 108, 109, 110, 111]
[112, 113, 114, 115, 116, 117, 118]
[119, 120, 121, 122, 123, 124, 125]
[126, 127, 128, 129, 130, 131, 132]
[133, 134, 135, 136, 137, 138, 139]
[140, 141, 142, 143, 144, 145, 146]
[147, 148, 149, 150, 151, 152, 153]
[154, 155, 156, 157, 158, 159, 160]
[161, 162, 163, 164, 165, 166, 167]
[168, 169, 170, 171, 172, 173, 174]
[175, 176, 177, 178, 179, 180, 181]
[182, 183, 184, 185, 186, 187, 188]
[189, 190, 191, 192, 193, 194, 195]
[196, 197, 198, 199, 200, 201, 202]
[203, 204, 205, 206, 207, 208, 209]
[210, 211, 212, 213, 214, 215,