In [1]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [2]:
from tqdm import tqdm
import time

In [3]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 100 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [4]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "Microsoft/Phi-3.5-mini-instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = "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", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2024.10.0: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [5]:

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"][0]
    inputs       = examples["input"]
    outputs      = examples["output"]
    contexts      = examples["context"]
    sample_indices = examples["sample_index"]
    texts = []
    # for instruction, input, context, output in zip(instructions, inputs, contexts, outputs):
    #   # Must add EOS_TOKEN, otherwise your generation will go on forever!
    #   text = alpaca_prompt.format(instruction, input, context, output) + EOS_TOKEN
    #   texts.append(text)
    # return { "text" : texts, }

    for input_val, output_val in zip(inputs, outputs):
        # Format string with both input and output, and ensure EOS_TOKEN is added
        text = f"#INPUT \n{input_val} \n" + EOS_TOKEN
        texts.append(text)

    return { "text" : texts, "sample_index" : sample_indices, }

In [6]:
# get the validation dataset with retrieved context
from datasets import load_dataset
augmented_dataset = load_dataset("aamina/channel_gains_vs_tx_powers_ee_augmented_with_300_examples_context", split="validation[:1000]")

In [7]:
augmented_dataset = augmented_dataset.map(formatting_prompts_func, batched=True)

In [8]:
# Function to Extract Required Result from LLM Responses

import re

def extract_response(input_response):

  # Find all matches in the input string
  digits = re.findall(r'\d', input_response)

  if digits:
    return(f"{digits[0]}, {digits[1]}")
  else:
    return ("No match found.")



In [9]:
def format_sample(entry):

    # Split the entry into input and output parts
    input_part = entry.split("then")[0].strip()
    output_part = "then " + entry.split("then")[1].strip()

    # Format the new output
    formatted_sample = (f"INPUT\n{input_part}\nOUTPUT\n{output_part}\n")

    return formatted_sample

In [10]:
from tqdm import tqdm
import json
import gc

FastLanguageModel.for_inference(model)

# Set up for measuring inference time
num_inferences = 100
total_time = 0


results = []
for i in tqdm(range(num_inferences), desc="Running Inference"):
  query = augmented_dataset[i]
  # Prompt Engineering
  prompt_string = query["instruction"] + "\n"
  context = query["context"]
  context_list = context.split('\n')
  formatted_context = ""
  for sample in context_list:
    formatted_context = formatted_context + '\n' + format_sample(sample)
  # print(formatted_context)
  # print(query["input"].rstrip(','))

  prompt_string = prompt_string + formatted_context + "\n" + "Give values of B close to the examples provided above. Your answer should be bigger than 10.\n#INPUT:\n" + query["input"] +  "\n#OUTPUT\nthen B is ?\nGive two values bigger than 10."

  # print(prompt_string, '\n')
  # print(query["input"])

  # Tokenizing the prompt to feed into Model
  inputs = tokenizer(prompt_string + tokenizer.eos_token, return_tensors='pt', padding=True, truncation=True)
  input_ids = inputs['input_ids']  # Extract input_ids from your provided tensor
  attention_mask = inputs['attention_mask']

  # print(inputs)
  # print(attention_mask)
  # Prompting the Model

  # Measure inference time
  start_time = time.time()
  with torch.no_grad():
    outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_new_tokens=100)
  end_time = time.time()

  inference_time = end_time - start_time
  total_time += inference_time
  print(f"Inference {i+1} took {inference_time:.4f} seconds")


# Calculate and print the average time
average_time = total_time / num_inferences
print(f"\nAverage inference time over {num_inferences} runs: {average_time:.4f} seconds")


Running Inference:   1%|          | 1/100 [00:20<33:06, 20.06s/it]

Inference 1 took 20.0392 seconds


Running Inference:   2%|▏         | 2/100 [00:35<27:59, 17.14s/it]

Inference 2 took 15.0686 seconds


Running Inference:   3%|▎         | 3/100 [00:50<26:02, 16.11s/it]

Inference 3 took 14.8678 seconds


Running Inference:   4%|▍         | 4/100 [01:05<25:30, 15.95s/it]

Inference 4 took 15.6809 seconds


Running Inference:   5%|▌         | 5/100 [01:21<25:00, 15.80s/it]

Inference 5 took 15.5201 seconds


Running Inference:   6%|▌         | 6/100 [01:36<24:30, 15.65s/it]

Inference 6 took 15.3229 seconds


Running Inference:   7%|▋         | 7/100 [01:51<24:05, 15.54s/it]

Inference 7 took 15.2901 seconds


Running Inference:   8%|▊         | 8/100 [02:07<23:41, 15.45s/it]

Inference 8 took 15.2254 seconds


Running Inference:   9%|▉         | 9/100 [02:22<23:29, 15.49s/it]

Inference 9 took 15.5534 seconds


Running Inference:  10%|█         | 10/100 [02:38<23:11, 15.46s/it]

Inference 10 took 15.3995 seconds


Running Inference:  11%|█         | 11/100 [02:53<22:57, 15.48s/it]

Inference 11 took 15.4977 seconds


Running Inference:  12%|█▏        | 12/100 [03:09<22:42, 15.48s/it]

Inference 12 took 15.4619 seconds


Running Inference:  13%|█▎        | 13/100 [03:24<22:19, 15.39s/it]

Inference 13 took 15.1615 seconds


Running Inference:  14%|█▍        | 14/100 [03:39<22:08, 15.44s/it]

Inference 14 took 15.5418 seconds


Running Inference:  15%|█▌        | 15/100 [03:55<21:49, 15.41s/it]

Inference 15 took 15.3162 seconds


Running Inference:  16%|█▌        | 16/100 [04:10<21:31, 15.38s/it]

Inference 16 took 15.2827 seconds


Running Inference:  17%|█▋        | 17/100 [04:25<21:17, 15.39s/it]

Inference 17 took 15.3877 seconds


Running Inference:  18%|█▊        | 18/100 [04:41<21:12, 15.52s/it]

Inference 18 took 15.8119 seconds


Running Inference:  19%|█▉        | 19/100 [04:58<21:28, 15.91s/it]

Inference 19 took 16.7566 seconds


Running Inference:  20%|██        | 20/100 [05:13<20:52, 15.66s/it]

Inference 20 took 15.0373 seconds


Running Inference:  21%|██        | 21/100 [05:28<20:20, 15.45s/it]

Inference 21 took 14.9473 seconds


Running Inference:  22%|██▏       | 22/100 [05:44<20:03, 15.43s/it]

Inference 22 took 15.3710 seconds


Running Inference:  23%|██▎       | 23/100 [05:59<19:44, 15.38s/it]

Inference 23 took 15.2526 seconds


Running Inference:  24%|██▍       | 24/100 [06:15<19:53, 15.70s/it]

Inference 24 took 16.4187 seconds


Running Inference:  25%|██▌       | 25/100 [06:31<19:33, 15.64s/it]

Inference 25 took 15.4807 seconds


Running Inference:  26%|██▌       | 26/100 [06:46<19:14, 15.60s/it]

Inference 26 took 15.4929 seconds


Running Inference:  27%|██▋       | 27/100 [07:02<18:54, 15.54s/it]

Inference 27 took 15.3781 seconds


Running Inference:  28%|██▊       | 28/100 [07:17<18:38, 15.54s/it]

Inference 28 took 15.5102 seconds


Running Inference:  29%|██▉       | 29/100 [07:33<18:22, 15.53s/it]

Inference 29 took 15.4788 seconds


Running Inference:  30%|███       | 30/100 [07:48<18:06, 15.52s/it]

Inference 30 took 15.4698 seconds


Running Inference:  31%|███       | 31/100 [08:04<17:47, 15.47s/it]

Inference 31 took 15.3449 seconds


Running Inference:  32%|███▏      | 32/100 [08:19<17:30, 15.45s/it]

Inference 32 took 15.3986 seconds


Running Inference:  33%|███▎      | 33/100 [08:35<17:17, 15.48s/it]

Inference 33 took 15.5321 seconds


Running Inference:  34%|███▍      | 34/100 [08:50<16:59, 15.45s/it]

Inference 34 took 15.3476 seconds


Running Inference:  35%|███▌      | 35/100 [09:05<16:41, 15.40s/it]

Inference 35 took 15.2625 seconds


Running Inference:  36%|███▌      | 36/100 [09:21<16:24, 15.39s/it]

Inference 36 took 15.3267 seconds


Running Inference:  37%|███▋      | 37/100 [09:36<16:10, 15.41s/it]

Inference 37 took 15.4319 seconds


Running Inference:  38%|███▊      | 38/100 [09:51<15:56, 15.43s/it]

Inference 38 took 15.4538 seconds


Running Inference:  39%|███▉      | 39/100 [10:07<15:40, 15.42s/it]

Inference 39 took 15.3719 seconds


Running Inference:  40%|████      | 40/100 [10:22<15:19, 15.32s/it]

Inference 40 took 15.0700 seconds


Running Inference:  41%|████      | 41/100 [10:37<15:04, 15.32s/it]

Inference 41 took 15.3113 seconds


Running Inference:  42%|████▏     | 42/100 [10:52<14:45, 15.27s/it]

Inference 42 took 15.1193 seconds


Running Inference:  43%|████▎     | 43/100 [11:08<14:27, 15.22s/it]

Inference 43 took 15.0928 seconds


Running Inference:  44%|████▍     | 44/100 [11:23<14:15, 15.27s/it]

Inference 44 took 15.3770 seconds


Running Inference:  45%|████▌     | 45/100 [11:38<14:03, 15.34s/it]

Inference 45 took 15.4855 seconds


Running Inference:  46%|████▌     | 46/100 [11:54<13:49, 15.37s/it]

Inference 46 took 15.3889 seconds


Running Inference:  47%|████▋     | 47/100 [12:09<13:36, 15.41s/it]

Inference 47 took 15.5001 seconds


Running Inference:  48%|████▊     | 48/100 [12:25<13:23, 15.46s/it]

Inference 48 took 15.5418 seconds


Running Inference:  49%|████▉     | 49/100 [12:40<13:05, 15.40s/it]

Inference 49 took 15.2531 seconds


Running Inference:  50%|█████     | 50/100 [12:56<12:50, 15.41s/it]

Inference 50 took 15.4021 seconds


Running Inference:  51%|█████     | 51/100 [13:11<12:29, 15.30s/it]

Inference 51 took 15.0096 seconds


Running Inference:  52%|█████▏    | 52/100 [13:26<12:17, 15.36s/it]

Inference 52 took 15.4635 seconds


Running Inference:  53%|█████▎    | 53/100 [13:42<12:03, 15.38s/it]

Inference 53 took 15.4314 seconds


Running Inference:  54%|█████▍    | 54/100 [13:57<11:51, 15.46s/it]

Inference 54 took 15.6253 seconds


Running Inference:  55%|█████▌    | 55/100 [14:13<11:34, 15.43s/it]

Inference 55 took 15.3259 seconds


Running Inference:  56%|█████▌    | 56/100 [14:28<11:16, 15.37s/it]

Inference 56 took 15.2003 seconds


Running Inference:  57%|█████▋    | 57/100 [14:43<10:56, 15.27s/it]

Inference 57 took 15.0288 seconds


Running Inference:  58%|█████▊    | 58/100 [14:58<10:43, 15.32s/it]

Inference 58 took 15.3807 seconds


Running Inference:  59%|█████▉    | 59/100 [15:14<10:28, 15.34s/it]

Inference 59 took 15.3614 seconds


Running Inference:  60%|██████    | 60/100 [15:29<10:14, 15.35s/it]

Inference 60 took 15.3817 seconds


Running Inference:  61%|██████    | 61/100 [15:44<09:55, 15.28s/it]

Inference 61 took 15.0809 seconds


Running Inference:  62%|██████▏   | 62/100 [15:59<09:38, 15.22s/it]

Inference 62 took 15.0703 seconds


Running Inference:  63%|██████▎   | 63/100 [16:15<09:25, 15.28s/it]

Inference 63 took 15.3996 seconds


Running Inference:  64%|██████▍   | 64/100 [16:30<09:11, 15.31s/it]

Inference 64 took 15.3522 seconds


Running Inference:  65%|██████▌   | 65/100 [16:46<08:56, 15.34s/it]

Inference 65 took 15.3890 seconds


Running Inference:  66%|██████▌   | 66/100 [17:01<08:42, 15.38s/it]

Inference 66 took 15.4519 seconds


Running Inference:  67%|██████▋   | 67/100 [17:16<08:28, 15.40s/it]

Inference 67 took 15.4202 seconds


Running Inference:  68%|██████▊   | 68/100 [17:32<08:12, 15.39s/it]

Inference 68 took 15.3542 seconds


Running Inference:  69%|██████▉   | 69/100 [17:47<07:58, 15.44s/it]

Inference 69 took 15.5310 seconds


Running Inference:  70%|███████   | 70/100 [18:03<07:42, 15.41s/it]

Inference 70 took 15.3111 seconds


Running Inference:  71%|███████   | 71/100 [18:18<07:27, 15.42s/it]

Inference 71 took 15.4380 seconds


Running Inference:  72%|███████▏  | 72/100 [18:34<07:12, 15.45s/it]

Inference 72 took 15.4828 seconds


Running Inference:  73%|███████▎  | 73/100 [18:49<06:58, 15.49s/it]

Inference 73 took 15.5669 seconds


Running Inference:  74%|███████▍  | 74/100 [19:05<06:43, 15.50s/it]

Inference 74 took 15.5031 seconds


Running Inference:  75%|███████▌  | 75/100 [19:20<06:27, 15.49s/it]

Inference 75 took 15.4306 seconds


Running Inference:  76%|███████▌  | 76/100 [19:36<06:12, 15.50s/it]

Inference 76 took 15.5164 seconds


Running Inference:  77%|███████▋  | 77/100 [19:51<05:55, 15.47s/it]

Inference 77 took 15.3782 seconds


Running Inference:  78%|███████▊  | 78/100 [20:07<05:39, 15.44s/it]

Inference 78 took 15.3498 seconds


Running Inference:  79%|███████▉  | 79/100 [20:22<05:25, 15.49s/it]

Inference 79 took 15.5908 seconds


Running Inference:  80%|████████  | 80/100 [20:38<05:09, 15.50s/it]

Inference 80 took 15.4713 seconds


Running Inference:  81%|████████  | 81/100 [20:53<04:54, 15.49s/it]

Inference 81 took 15.4421 seconds


Running Inference:  82%|████████▏ | 82/100 [21:08<04:37, 15.44s/it]

Inference 82 took 15.3132 seconds


Running Inference:  83%|████████▎ | 83/100 [21:24<04:22, 15.47s/it]

Inference 83 took 15.5142 seconds


Running Inference:  84%|████████▍ | 84/100 [21:39<04:06, 15.43s/it]

Inference 84 took 15.3275 seconds


Running Inference:  85%|████████▌ | 85/100 [21:55<03:51, 15.41s/it]

Inference 85 took 15.3312 seconds


Running Inference:  86%|████████▌ | 86/100 [22:10<03:36, 15.43s/it]

Inference 86 took 15.4574 seconds


Running Inference:  87%|████████▋ | 87/100 [22:26<03:21, 15.48s/it]

Inference 87 took 15.5775 seconds


Running Inference:  88%|████████▊ | 88/100 [22:41<03:05, 15.49s/it]

Inference 88 took 15.4852 seconds


Running Inference:  89%|████████▉ | 89/100 [22:57<02:50, 15.52s/it]

Inference 89 took 15.5641 seconds


Running Inference:  90%|█████████ | 90/100 [23:12<02:34, 15.46s/it]

Inference 90 took 15.3061 seconds


Running Inference:  91%|█████████ | 91/100 [23:28<02:19, 15.45s/it]

Inference 91 took 15.4021 seconds


Running Inference:  92%|█████████▏| 92/100 [23:43<02:03, 15.41s/it]

Inference 92 took 15.2929 seconds


Running Inference:  93%|█████████▎| 93/100 [23:58<01:48, 15.45s/it]

Inference 93 took 15.5164 seconds


Running Inference:  94%|█████████▍| 94/100 [24:14<01:32, 15.46s/it]

Inference 94 took 15.4664 seconds


Running Inference:  95%|█████████▌| 95/100 [24:29<01:16, 15.38s/it]

Inference 95 took 15.1606 seconds


Running Inference:  96%|█████████▌| 96/100 [24:45<01:01, 15.42s/it]

Inference 96 took 15.5040 seconds


Running Inference:  97%|█████████▋| 97/100 [25:00<00:46, 15.45s/it]

Inference 97 took 15.4808 seconds


Running Inference:  98%|█████████▊| 98/100 [25:16<00:30, 15.42s/it]

Inference 98 took 15.3416 seconds


Running Inference:  99%|█████████▉| 99/100 [25:30<00:15, 15.27s/it]

Inference 99 took 14.9010 seconds


Running Inference: 100%|██████████| 100/100 [25:46<00:00, 15.46s/it]

Inference 100 took 15.3155 seconds

Average inference time over 100 runs: 15.4403 seconds



