In [1]:
import onnxruntime_genai as og
import numpy as np
import os
import time
import psutil

In [2]:
model_folder = "Your DeepSeek-R1-Distill-Qwen-7B-INT4-ONNX location"

In [3]:

process = psutil.Process(os.getpid())

mem_before_load = process.memory_info().rss / (1024 * 1024)

In [4]:
model = og.Model(model_folder)

In [5]:

mem_after_load = process.memory_info().rss / (1024 * 1024)
print(f"Memory before model loading: {mem_before_load:.2f} MB, Memory after loading: {mem_after_load:.2f} MB, Added: {mem_after_load - mem_before_load:.2f} MB")

Memory before model loading: 80.36 MB, Memory after loading: 4379.05 MB, Added: 4298.69 MB


In [6]:
tokenizer = og.Tokenizer(model)
tokenizer_stream = tokenizer.create_stream()

In [7]:
search_options = {}
search_options['max_length'] = 2048
search_options['past_present_share_buffer'] = False

In [8]:
chat_template = "<|user|>{input}<|assistant|>"

In [9]:
# text = """explain 1+1=2"""

text = """Find all pairwise different isomorphism groups of order 147 that do not contain elements of order 49"""

In [10]:
prompt = f'{chat_template.format(input=text)}'

In [11]:
input_tokens = tokenizer.encode(prompt)

In [12]:
params = og.GeneratorParams(model)

In [13]:
params.set_search_options(**search_options)
params.input_ids = input_tokens

In [14]:
generator = og.Generator(model, params)

In [15]:

start_time = time.time()

first_token_time = None
token_count = 0

In [16]:
while not generator.is_done():
      generator.compute_logits()
      generator.generate_next_token()

      new_token = generator.get_next_tokens()[0]
      token_text = tokenizer.decode(new_token)
      # print(tokenizer_stream.decode(new_token), end='', flush=True)
      if token_count == 0:
        first_token_time = time.time()
        first_response_latency = first_token_time - start_time
        print(f"firstly token delpay: {first_response_latency:.4f} s")

      print(token_text, end='', flush=True)
      token_count += 1

firstly token delpay: 2.2185 s
Okay, so I need to find all pairwise different isomorphism groups of order 147 that do not contain elements of order 49. Hmm, let me start by understanding the problem step by step.

First, the order of the group is 147. I know that 147 factors into prime numbers as 3 × 7². So, the group has order 3 × 49. Since 3 and 49 are coprime, any group of order 147 is solvable by Sylow's theorem. That means the group is either a direct product of its Sylow subgroups or a semidirect product.

Now, the Sylow theorems tell me that the number of Sylow p-subgroups for each prime p dividing the group order must satisfy certain conditions. Let's consider the Sylow 3-subgroups and Sylow 7-subgroups.

For the Sylow 3-subgroup, the number of such subgroups, n_3, must divide 49 and be congruent to 1 mod 3. So, n_3 can be 1 or 49. Similarly, for the Sylow 7-subgroups, n_7 must divide 3 and be congruent to 1 mod 7. Since 3 is less than 7, n_7 can only be 1.

If n_7 = 1, then th

In [17]:
end_time = time.time()
total_time = end_time - start_time
print("\n--- Inference ends ---")
print(f"A total of  {token_count} tokens were output, taking {total_time:.4f} second")

if token_count > 0:
    average_speed = token_count / total_time
    print(f"Average generation rate: {average_speed:.2f} tokens/second")

mem_after_inference = process.memory_info().rss / (1024 * 1024)
print(f"Memory usage after inference: {mem_after_inference:.2f} MB, change from after loading {mem_after_inference - mem_after_load:.2f} MB")


--- Inference ends ---
A total of  2014 tokens were output, taking 1099.0207 second
Average generation rate: 1.83 tokens/second
Memory usage after inference: 4904.38 MB, change from after loading 525.32 MB
