# Imports & Setup

Download all the necessary dependencies. These should be exactly the ones present in the `environment.yaml` file.


In [1]:
!pip -q install numpy tqdm pandas transformers accelerate bitsandbytes

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
!git clone https://github.com/prundeanualin/ATCS-project.git

Cloning into 'ATCS-project'...
remote: Enumerating objects: 87, done.[K
remote: Counting objects: 100% (87/87), done.[K
remote: Compressing objects: 100% (69/69), done.[K
remote: Total 87 (delta 22), reused 46 (delta 13), pack-reused 0[K
Receiving objects: 100% (87/87), 43.01 KiB | 1.72 MiB/s, done.
Resolving deltas: 100% (22/22), done.
fatal: not a git repository (or any of the parent directories): .git


In [8]:
# IF YOU WANT TO TEST THINGS FROM YOUR OWN BRANCH, UNCOMMENT BELOW
# ! git checkout <your_own_branch>

Branch 'first_run' set up to track remote branch 'first_run' from 'origin'.
Switched to a new branch 'first_run'


In [10]:
! git status

On branch first_run
Your branch is up to date with 'origin/first_run'.

nothing to commit, working tree clean


In [11]:
%cd /content/ATCS-project

/content/ATCS-project


In [12]:
import argparse

from get_datasets import SCAN_EXAMPLES_FILEPATH, EXAMPLE_CATEGORIES
from prompt_templates.analogy import ANALOGY_TEMPLATE_SIMPLE_INFERENCE, ANALOGY_TEMPLATE_SIMPLE_FULL
from model import LLMObj
import torch
from tqdm import tqdm
from transformers import BitsAndBytesConfig
import pickle
from datasets import ScanDataset
import os

from utils import seed_experiments

os.environ['HF_TOKEN'] = "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"
os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '1'

torch.set_default_device('cuda')

# Inference

`LLMObj` is a HF wrapper that contains the LLM model, tokenizer, and text generation wrapper.

Below the class code, several LLMs that are available on HF are initialized.

For some models like LLama, you need to authenticate your HF account, so add your [HF access token](https://huggingface.co/docs/hub/security-tokens) to the secrets on secrets as `HF_TOKEN`.

## Model arguments

In [13]:
# Since ArgParser does not work in colab, we just construct a custom class with all our neccessary arguments
class Args(argparse.Namespace):
  model = "microsoft/Phi-3-mini-128k-instruct"
  tokenizer = "microsoft/Phi-3-mini-128k-instruct"
  quantization = "4bit"
  low_cpu_mem_usage = True
  seed=1234

args = Args()

seed_experiments(args.seed)

## Load the dataset

In [14]:
# Load the dataset
dataset = ScanDataset(
    shuffle=False,
    analogy_sentence_infer=ANALOGY_TEMPLATE_SIMPLE_INFERENCE,
    analogy_sentence_full=ANALOGY_TEMPLATE_SIMPLE_FULL,
    examples_file=SCAN_EXAMPLES_FILEPATH.format(EXAMPLE_CATEGORIES[0]),
    examples_start_idx=0,
    examples_shot_nr=1
)

SCAN dataset file downloaded successfully.


## Load the model

In [15]:
quantization = None
if args.quantization == '4bit':
    quantization = BitsAndBytesConfig(load_in_4bit=True)

model_kwargs = {
    "torch_dtype": torch.bfloat16,
    "low_cpu_mem_usage": args.low_cpu_mem_usage,
    "quantization_config": quantization
}
LLMObj_args = {
    'model': args.model,
    'model_kwargs': model_kwargs,
    'tokenizer_name': args.tokenizer
}
print("LLMObj Arguments are:")
print(LLMObj_args)

# Load the model
LLM = LLMObj(**LLMObj_args)

LLMObj Arguments are:
{'model': 'microsoft/Phi-3-mini-128k-instruct', 'model_kwargs': {'torch_dtype': torch.bfloat16, 'low_cpu_mem_usage': True, 'quantization_config': BitsAndBytesConfig {
  "_load_in_4bit": true,
  "_load_in_8bit": false,
  "bnb_4bit_compute_dtype": "float32",
  "bnb_4bit_quant_storage": "uint8",
  "bnb_4bit_quant_type": "fp4",
  "bnb_4bit_use_double_quant": false,
  "llm_int8_enable_fp32_cpu_offload": false,
  "llm_int8_has_fp16_weight": false,
  "llm_int8_skip_modules": null,
  "llm_int8_threshold": 6.0,
  "load_in_4bit": true,
  "load_in_8bit": false,
  "quant_method": "bitsandbytes"
}
}, 'tokenizer_name': 'microsoft/Phi-3-mini-128k-instruct'}


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/3.17k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/568 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/3.35k [00:00<?, ?B/s]

configuration_phi3.py:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-128k-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi3.py:   0%|          | 0.00/73.8k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-128k-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/16.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

## Run the inference pipeline

In [16]:
# Stop at just 3 generations, just to see it in action
stop_at_datapoint_idx = 3

# Run inference
generated_prompts = []
for i, sample in tqdm(enumerate(dataset)):
  output = LLM.generate(sample['inference'])
  generated_prompts.append([sample, output])
  if i >= stop_at_datapoint_idx:
    break

with open(f'{args.model.split("/")[1]}_generated_prompts.pl', 'wb') as f:
    pickle.dump(generated_prompts, f)


447it [02:14,  3.31it/s]


In [22]:
import pandas as pd
obj = pd.read_pickle(f'{args.model.split("/")[1]}_generated_prompts.pl')
for el in obj:
  print(el)

[{'inference': 'If atom is like solar system, then electron is like...', 'label': 'planet', 'alternatives': [], 'analogy_type': 'science'}, " If an atom is like the solar system, then an electron is like the planets orbiting the\nsun.\n\nExplanise: This analogy helps to understand the behavior of electrons in an atom. Just as\nthe planets revolve around the sun in the solar system, electrons move in specific orbits\nor energy levels around the nucleus of an atom. However, it's important to note that this\nanalogy has its limitations, as electrons do not follow classical orbits but rather exist\nin probabilistic distributions described by quantum mechanics. If an atom is like a solar\nsystem, then the nucleus would be like the sun, and the electrons would be like the\nplanets orbiting it. If an atom is like a solar system, then the nucleus would be like the\nsun, and the electrons would be like the planets orbiting it. However, it's important to\nnote that this analogy has its limitatio