In [1]:
!pip install -q -U bitsandbytes accelerate transformers peft ucimlrepo

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import torch
from ucimlrepo import fetch_ucirepo
import sklearn.model_selection
import numpy as np
import pandas as pd


# Download and Configure the Model

In [3]:
# Model ID
model_id = "tiiuae/Falcon-H1-3B-Instruct"

#Quantization configuration for 4-bit
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

#Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    trust_remote_code=True,
    device_map={"": 0}
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

The fast path is not available because on of `(selective_state_update, causal_conv1d_fn, causal_conv1d_update)` is None. Falling back to the naive implementation. To install follow https://github.com/state-spaces/mamba/#installation and https://github.com/Dao-AILab/causal-conv1d


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/145 [00:00<?, ?B/s]

In [4]:
# fetch dataset
data = fetch_ucirepo(id=45)

#Convert to binary classification (no heart disease vs heart disease)
y = data.data.targets.to_numpy().reshape(-1)
y = np.clip(y, 0, 1)

#Impute missing values with mode
df = data.data.features
df = df.fillna(df.mode().iloc[0])

#Get feature values
X = df.to_numpy()

#Split into train and test sets
Xtrain, Xtest, ytrain, ytest = sklearn.model_selection.train_test_split(X, y, test_size=0.20, random_state=589,stratify=y)


In [None]:
def map_output(s):
  #Map text Y/N answers to labels.
  s = s.strip()
  if s=="Y": return 1
  elif s=="N": return 0
  else: return -1

In [15]:
#Construct the pipline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

#Define the prompt template
prompt_template = """Instructions: You are taksed with diagnosing which patients have heart disease.
You will be given a description of a patient. Answer "Y" for yes heart disease or "N" for no heart disease.
Give the answer as one of the characters 'Y' or 'N' only. Do not give an explanation. Do not produce other output.
Description:<d>
Answer:"""

#Constrcut prompts
instances = []
for x in Xtest:
  description = f"The patient is {int(x[0])} years old."
  prompt = prompt_template.replace("<d>", description)
  instances.append(prompt)

#Run the LLM
with torch.no_grad():
  outputs = pipe(instances, do_sample=False,return_full_text=False, max_new_tokens=1)

#Convert the prediction text to class labels
ypred=np.array([map_output(o[0]["generated_text"]) for o in outputs]).astype(int)
ypred

Device set to use cuda:0


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])