In [1]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [2]:
from tqdm import tqdm
import time

In [3]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 100 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [4]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "Microsoft/Phi-3.5-mini-instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = "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", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2024.10.3: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.26G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/140 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.37k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!
Please update transformers via:
`pip uninstall transformers -y && pip install --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git"`


In [5]:

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"][0]
    inputs       = examples["input"]
    outputs      = examples["output"]
    contexts      = examples["context"]
    sample_indices = examples["sample_index"]
    texts = []
    # for instruction, input, context, output in zip(instructions, inputs, contexts, outputs):
    #   # Must add EOS_TOKEN, otherwise your generation will go on forever!
    #   text = alpaca_prompt.format(instruction, input, context, output) + EOS_TOKEN
    #   texts.append(text)
    # return { "text" : texts, }

    for input_val, output_val in zip(inputs, outputs):
        # Format string with both input and output, and ensure EOS_TOKEN is added
        text = f"#INPUT \n{input_val} \n" + EOS_TOKEN
        texts.append(text)

    return { "text" : texts, "sample_index" : sample_indices, }

In [6]:
# get the validation dataset with retrieved context
from datasets import load_dataset
augmented_dataset = load_dataset("aamina/channel_gains_vs_tx_powers_ee_augmented_with_30_examples_context_10k", split="validation[:1000]")

README.md:   0%|          | 0.00/445 [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/5.04M [00:00<?, ?B/s]

Generating validation split:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [7]:
augmented_dataset = augmented_dataset.map(formatting_prompts_func, batched=True)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [8]:
# Function to Extract Required Result from LLM Responses

import re

def extract_response(input_response):

  # Find all matches in the input string
  digits = re.findall(r'\d', input_response)

  if digits:
    return(f"{digits[0]}, {digits[1]}")
  else:
    return ("No match found.")



In [9]:
def format_sample(entry):

    # Split the entry into input and output parts
    input_part = entry.split("then")[0].strip()
    output_part = "then " + entry.split("then")[1].strip()

    # Format the new output
    formatted_sample = (f"INPUT\n{input_part}\nOUTPUT\n{output_part}\n")

    return formatted_sample

In [10]:
from tqdm import tqdm
import json
import gc

FastLanguageModel.for_inference(model)

# Set up for measuring inference time
num_inferences = 100
total_time = 0


results = []
for i in tqdm(range(num_inferences), desc="Running Inference"):
  query = augmented_dataset[i]
  # Prompt Engineering
  prompt_string = query["instruction"] + "\n"
  context = query["context"]
  context_list = context.split('\n')
  formatted_context = ""
  for sample in context_list:
    formatted_context = formatted_context + '\n' + format_sample(sample)
  # print(formatted_context)
  # print(query["input"].rstrip(','))

  prompt_string = prompt_string + formatted_context + "\n" + "Give values of B close to the examples provided above. Your answer should be bigger than 10.\n#INPUT:\n" + query["input"] +  "\n#OUTPUT\nthen B is ?\nGive two values bigger than 10."

  # print(prompt_string, '\n')
  # print(query["input"])

  # Tokenizing the prompt to feed into Model
  inputs = tokenizer(prompt_string + tokenizer.eos_token, return_tensors='pt', padding=True, truncation=True)
  input_ids = inputs['input_ids']  # Extract input_ids from your provided tensor
  attention_mask = inputs['attention_mask']

  # print(inputs)
  # print(attention_mask)
  # Prompting the Model

  # Measure inference time
  start_time = time.time()
  with torch.no_grad():
    outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_new_tokens=100)
  end_time = time.time()

  inference_time = end_time - start_time
  total_time += inference_time
  print(f"Inference {i+1} took {inference_time:.4f} seconds")


# Calculate and print the average time
average_time = total_time / num_inferences
print(f"\nAverage inference time over {num_inferences} runs: {average_time:.4f} seconds")


Running Inference:   1%|          | 1/100 [00:10<16:31, 10.01s/it]

Inference 1 took 10.0082 seconds


Running Inference:   2%|▏         | 2/100 [00:14<11:14,  6.89s/it]

Inference 2 took 4.6953 seconds


Running Inference:   3%|▎         | 3/100 [00:19<09:56,  6.15s/it]

Inference 3 took 5.2586 seconds


Running Inference:   4%|▍         | 4/100 [00:24<09:00,  5.63s/it]

Inference 4 took 4.8354 seconds


Running Inference:   5%|▌         | 5/100 [00:29<08:22,  5.29s/it]

Inference 5 took 4.6829 seconds


Running Inference:   6%|▌         | 6/100 [00:35<08:34,  5.48s/it]

Inference 6 took 5.8333 seconds


Running Inference:   7%|▋         | 7/100 [00:40<08:06,  5.23s/it]

Inference 7 took 4.7282 seconds


Running Inference:   8%|▊         | 8/100 [00:46<08:45,  5.72s/it]

Inference 8 took 6.7409 seconds


Running Inference:   9%|▉         | 9/100 [00:51<08:11,  5.40s/it]

Inference 9 took 4.7025 seconds


Running Inference:  10%|█         | 10/100 [00:56<07:49,  5.21s/it]

Inference 10 took 4.7806 seconds


Running Inference:  11%|█         | 11/100 [01:01<07:56,  5.35s/it]

Inference 11 took 5.6549 seconds


Running Inference:  12%|█▏        | 12/100 [01:06<07:34,  5.16s/it]

Inference 12 took 4.7209 seconds


Running Inference:  13%|█▎        | 13/100 [01:12<07:32,  5.20s/it]

Inference 13 took 5.2928 seconds


Running Inference:  14%|█▍        | 14/100 [01:16<07:20,  5.12s/it]

Inference 14 took 4.9171 seconds


Running Inference:  15%|█▌        | 15/100 [01:21<07:04,  4.99s/it]

Inference 15 took 4.7021 seconds


Running Inference:  16%|█▌        | 16/100 [01:27<07:15,  5.19s/it]

Inference 16 took 5.6359 seconds


Running Inference:  17%|█▋        | 17/100 [01:31<06:58,  5.04s/it]

Inference 17 took 4.7014 seconds


Running Inference:  18%|█▊        | 18/100 [01:37<06:53,  5.04s/it]

Inference 18 took 5.0354 seconds


Running Inference:  19%|█▉        | 19/100 [01:42<06:53,  5.10s/it]

Inference 19 took 5.2399 seconds


Running Inference:  20%|██        | 20/100 [01:46<06:38,  4.98s/it]

Inference 20 took 4.6954 seconds


Running Inference:  21%|██        | 21/100 [01:52<06:50,  5.20s/it]

Inference 21 took 5.6937 seconds


Running Inference:  22%|██▏       | 22/100 [01:57<06:36,  5.08s/it]

Inference 22 took 4.7881 seconds


Running Inference:  23%|██▎       | 23/100 [02:02<06:23,  4.98s/it]

Inference 23 took 4.7303 seconds


Running Inference:  24%|██▍       | 24/100 [02:07<06:28,  5.11s/it]

Inference 24 took 5.4102 seconds


Running Inference:  25%|██▌       | 25/100 [02:12<06:15,  5.00s/it]

Inference 25 took 4.7533 seconds


Running Inference:  26%|██▌       | 26/100 [02:18<06:24,  5.19s/it]

Inference 26 took 5.6357 seconds


Running Inference:  27%|██▋       | 27/100 [02:22<06:09,  5.06s/it]

Inference 27 took 4.7559 seconds


Running Inference:  28%|██▊       | 28/100 [02:27<05:58,  4.98s/it]

Inference 28 took 4.7794 seconds


Running Inference:  29%|██▉       | 29/100 [02:33<06:05,  5.15s/it]

Inference 29 took 5.5272 seconds


Running Inference:  30%|███       | 30/100 [02:37<05:52,  5.03s/it]

Inference 30 took 4.7699 seconds


Running Inference:  31%|███       | 31/100 [02:43<06:01,  5.23s/it]

Inference 31 took 5.6914 seconds


Running Inference:  32%|███▏      | 32/100 [02:48<05:46,  5.09s/it]

Inference 32 took 4.7451 seconds


Running Inference:  33%|███▎      | 33/100 [02:53<05:33,  4.98s/it]

Inference 33 took 4.7249 seconds


Running Inference:  34%|███▍      | 34/100 [02:58<05:41,  5.18s/it]

Inference 34 took 5.6387 seconds


Running Inference:  35%|███▌      | 35/100 [03:03<05:28,  5.05s/it]

Inference 35 took 4.7446 seconds


Running Inference:  36%|███▌      | 36/100 [03:09<05:34,  5.23s/it]

Inference 36 took 5.6393 seconds


Running Inference:  37%|███▋      | 37/100 [03:13<05:21,  5.11s/it]

Inference 37 took 4.8143 seconds


Running Inference:  38%|███▊      | 38/100 [03:18<05:11,  5.02s/it]

Inference 38 took 4.7996 seconds


Running Inference:  39%|███▉      | 39/100 [03:24<05:17,  5.21s/it]

Inference 39 took 5.6481 seconds


Running Inference:  40%|████      | 40/100 [03:29<05:04,  5.07s/it]

Inference 40 took 4.7568 seconds


Running Inference:  41%|████      | 41/100 [03:34<05:07,  5.22s/it]

Inference 41 took 5.5414 seconds


Running Inference:  42%|████▏     | 42/100 [03:39<04:54,  5.08s/it]

Inference 42 took 4.7689 seconds


Running Inference:  43%|████▎     | 43/100 [03:44<04:44,  4.99s/it]

Inference 43 took 4.7682 seconds


Running Inference:  44%|████▍     | 44/100 [03:49<04:51,  5.21s/it]

Inference 44 took 5.7147 seconds


Running Inference:  45%|████▌     | 45/100 [03:54<04:40,  5.09s/it]

Inference 45 took 4.8078 seconds


Running Inference:  46%|████▌     | 46/100 [04:00<04:39,  5.18s/it]

Inference 46 took 5.3769 seconds


Running Inference:  47%|████▋     | 47/100 [04:04<04:28,  5.07s/it]

Inference 47 took 4.7937 seconds


Running Inference:  48%|████▊     | 48/100 [04:09<04:18,  4.98s/it]

Inference 48 took 4.7587 seconds


Running Inference:  49%|████▉     | 49/100 [04:15<04:24,  5.19s/it]

Inference 49 took 5.6837 seconds


Running Inference:  50%|█████     | 50/100 [04:20<04:14,  5.08s/it]

Inference 50 took 4.8337 seconds


Running Inference:  51%|█████     | 51/100 [04:25<04:14,  5.19s/it]

Inference 51 took 5.4411 seconds


Running Inference:  52%|█████▏    | 52/100 [04:30<04:04,  5.10s/it]

Inference 52 took 4.8672 seconds


Running Inference:  53%|█████▎    | 53/100 [04:35<03:55,  5.00s/it]

Inference 53 took 4.7713 seconds


Running Inference:  54%|█████▍    | 54/100 [04:41<04:01,  5.25s/it]

Inference 54 took 5.8227 seconds


Running Inference:  55%|█████▌    | 55/100 [04:45<03:49,  5.11s/it]

Inference 55 took 4.7775 seconds


Running Inference:  56%|█████▌    | 56/100 [04:51<03:44,  5.11s/it]

Inference 56 took 5.1110 seconds


Running Inference:  57%|█████▋    | 57/100 [04:56<03:38,  5.08s/it]

Inference 57 took 4.9923 seconds


Running Inference:  58%|█████▊    | 58/100 [05:01<03:37,  5.17s/it]

Inference 58 took 5.3817 seconds


Running Inference:  59%|█████▉    | 59/100 [05:07<03:37,  5.30s/it]

Inference 59 took 5.5968 seconds


Running Inference:  60%|██████    | 60/100 [05:11<03:25,  5.15s/it]

Inference 60 took 4.7890 seconds


Running Inference:  61%|██████    | 61/100 [05:17<03:26,  5.29s/it]

Inference 61 took 5.6103 seconds


Running Inference:  62%|██████▏   | 62/100 [05:22<03:15,  5.14s/it]

Inference 62 took 4.7865 seconds


Running Inference:  63%|██████▎   | 63/100 [05:27<03:07,  5.07s/it]

Inference 63 took 4.9136 seconds


Running Inference:  64%|██████▍   | 64/100 [05:32<03:07,  5.20s/it]

Inference 64 took 5.4740 seconds


Running Inference:  65%|██████▌   | 65/100 [05:37<02:57,  5.07s/it]

Inference 65 took 4.7577 seconds


Running Inference:  66%|██████▌   | 66/100 [05:43<02:59,  5.28s/it]

Inference 66 took 5.7629 seconds


Running Inference:  67%|██████▋   | 67/100 [05:48<02:49,  5.13s/it]

Inference 67 took 4.7782 seconds


Running Inference:  68%|██████▊   | 68/100 [05:52<02:40,  5.01s/it]

Inference 68 took 4.7277 seconds


Running Inference:  69%|██████▉   | 69/100 [05:58<02:43,  5.28s/it]

Inference 69 took 5.8849 seconds


Running Inference:  70%|███████   | 70/100 [06:03<02:33,  5.12s/it]

Inference 70 took 4.7506 seconds


Running Inference:  71%|███████   | 71/100 [06:08<02:32,  5.26s/it]

Inference 71 took 5.5830 seconds


Running Inference:  72%|███████▏  | 72/100 [06:13<02:23,  5.12s/it]

Inference 72 took 4.8000 seconds


Running Inference:  73%|███████▎  | 73/100 [06:18<02:15,  5.03s/it]

Inference 73 took 4.8134 seconds


Running Inference:  74%|███████▍  | 74/100 [06:24<02:16,  5.24s/it]

Inference 74 took 5.7075 seconds


Running Inference:  75%|███████▌  | 75/100 [06:29<02:07,  5.11s/it]

Inference 75 took 4.8189 seconds


Running Inference:  76%|███████▌  | 76/100 [06:34<02:05,  5.24s/it]

Inference 76 took 5.5403 seconds


Running Inference:  77%|███████▋  | 77/100 [06:39<01:57,  5.11s/it]

Inference 77 took 4.7919 seconds


Running Inference:  78%|███████▊  | 78/100 [06:44<01:50,  5.02s/it]

Inference 78 took 4.8043 seconds


Running Inference:  79%|███████▉  | 79/100 [06:50<01:50,  5.25s/it]

Inference 79 took 5.7780 seconds


Running Inference:  80%|████████  | 80/100 [06:54<01:42,  5.11s/it]

Inference 80 took 4.7935 seconds


Running Inference:  81%|████████  | 81/100 [07:00<01:39,  5.21s/it]

Inference 81 took 5.4357 seconds


Running Inference:  82%|████████▏ | 82/100 [07:05<01:31,  5.11s/it]

Inference 82 took 4.8571 seconds


Running Inference:  83%|████████▎ | 83/100 [07:09<01:25,  5.01s/it]

Inference 83 took 4.7835 seconds


Running Inference:  84%|████████▍ | 84/100 [07:15<01:23,  5.24s/it]

Inference 84 took 5.7580 seconds


Running Inference:  85%|████████▌ | 85/100 [07:20<01:16,  5.10s/it]

Inference 85 took 4.7809 seconds


Running Inference:  86%|████████▌ | 86/100 [07:25<01:11,  5.14s/it]

Inference 86 took 5.2259 seconds


Running Inference:  87%|████████▋ | 87/100 [07:30<01:06,  5.10s/it]

Inference 87 took 5.0064 seconds


Running Inference:  88%|████████▊ | 88/100 [07:35<01:00,  5.01s/it]

Inference 88 took 4.7867 seconds


Running Inference:  89%|████████▉ | 89/100 [07:41<00:57,  5.24s/it]

Inference 89 took 5.7638 seconds


Running Inference:  90%|█████████ | 90/100 [07:46<00:51,  5.13s/it]

Inference 90 took 4.8630 seconds


Running Inference:  91%|█████████ | 91/100 [07:51<00:46,  5.15s/it]

Inference 91 took 5.2003 seconds


Running Inference:  92%|█████████▏| 92/100 [07:56<00:41,  5.13s/it]

Inference 92 took 5.0622 seconds


Running Inference:  93%|█████████▎| 93/100 [08:01<00:35,  5.03s/it]

Inference 93 took 4.7868 seconds


Running Inference:  94%|█████████▍| 94/100 [08:07<00:31,  5.25s/it]

Inference 94 took 5.7606 seconds


Running Inference:  95%|█████████▌| 95/100 [08:11<00:25,  5.12s/it]

Inference 95 took 4.8144 seconds


Running Inference:  96%|█████████▌| 96/100 [08:17<00:20,  5.13s/it]

Inference 96 took 5.1479 seconds


Running Inference:  97%|█████████▋| 97/100 [08:22<00:15,  5.11s/it]

Inference 97 took 5.0436 seconds


Running Inference:  98%|█████████▊| 98/100 [08:26<00:10,  5.03s/it]

Inference 98 took 4.8483 seconds


Running Inference:  99%|█████████▉| 99/100 [08:32<00:05,  5.25s/it]

Inference 99 took 5.7466 seconds


Running Inference: 100%|██████████| 100/100 [08:37<00:00,  5.17s/it]

Inference 100 took 4.7797 seconds

Average inference time over 100 runs: 5.1683 seconds



