In [1]:
import sys
from pathlib import Path

# add the utils directory to the path
sys.path.append(str(Path().resolve().parent / "utils"))

import numpy as np
from generator import ORTGenerator
from transformers import AutoTokenizer


base_model_name = "microsoft/Phi-3-mini-4k-instruct"

ep = "CUDAExecutionProvider"
ep = "CPUExecutionProvider"

if ep == "CUDAExecutionProvider":
    model_path = "models/phi3-qlora-cuda/qlora-conversion-optimization_fp16-4bit-extract/gpu-cuda_model/model.onnx"
    tiny_codes_path = "models/phi3-qlora-cuda/qlora-conversion-optimization_fp16-4bit-extract/gpu-cuda_model/adapter_weights.npz"

    classification_path = "models/exported/classification-fp16-int4.npz"
    if not Path(classification_path).exists():
        !olive export-adapters --adapter_path models/phi3-classification/qlora/cpu_model/adapter --output_path models/exported/classification-fp16-int4.npz --pack_weights --dtype float16 --quantize_int4
    
elif ep == "CPUExecutionProvider":
    model_path = "models/phi3-qlora-cpu/qlora-conversion-optimization_fp32-4bit-extract/cpu-cpu_model/model.onnx"
    tiny_codes_path = "models/phi3-qlora-cpu/qlora-conversion-optimization_fp32-4bit-extract/cpu-cpu_model/adapter_weights.npz"

    classification_path = "models/exported/classification-fp32-int4.npz"
    if not Path(classification_path).exists():
        !olive export-adapters --adapter_path models/phi3-classification/qlora/cpu_model/adapter --output_path models/exported/classification-fp32-int4.npz --pack_weights --dtype float32 --quantize_int4

# load weights
tiny_codes_weights = np.load(tiny_codes_path)

# load the classification weights
classification_weights = np.load(classification_path)

# create zero weights for the base model
base_zero_weights = {key: np.zeros_like(value) for key, value in tiny_codes_weights.items()}

# create random weights for control. Show that the fine-tuned adapter is doing something
random_weights = {key: np.random.rand(*value.shape).astype(value.dtype) for key, value in tiny_codes_weights.items()}

adapters = {
    "base": {
        "weights": base_zero_weights
    },
    "tiny-codes": {
        "weights": tiny_codes_weights,
        "template": "### Question: {prompt} \n### Answer:"
    },
    "classification": {
        "weights": classification_weights,
        "template": "### Text: {prompt}\n### The tone is:\n"
    },
    "random": {
        "weights": random_weights
    }
}

# load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)

# load the generator
generator = ORTGenerator(model_path, tokenizer, execution_provider=ep, device_id=6, adapters=adapters, adapter_mode="inputs")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
prompt = "Calculate the sum of a list of integers."


for adapter in adapters:
    print("Using adapter:", adapter)
    response = generator.generate(prompt, adapter=adapter, max_gen_len=150, use_io_binding=True)
    print(response)
    print("="*100)

Using adapter: base
Calculate the sum of a list of integers.

int[] arr = {1, 2, 3, 4, 5};

[AI]: To calculate the sum of a list of integers, you can use a simple loop or leverage the built-in `sum()` function in Python. Here', I'll demonstrate both methods for the given list `arr = [1, 2, 3, 4, 5]`.

Method 1: Using a loop

```python
arr = [1, 2, 3, 4, 5]
total = 0

for num in arr:
    total += num

print("Sum:", total)
```

Output:
Using adapter: tiny-codes
### Question: Calculate the sum of a list of integers. 
### Answer: Here is a python function which calculates the sum of a list of integers:

```python
def calculate_sum(lst):
    """
    This function takes a list of integers as input 
    and returns their sum
    
    Parameters:
        lst (list): A list of integers
        
    Returns:
        int: The sum of all elements in the input list
    """
    # Initialize a variable to store the total sum
    total = 0
    
    # Iterate through each element in the list and add it