In [5]:
# Run Fine-Tune Llama2-7b on SE paired dataset
import os
from dataclasses import dataclass, field
from typing import Optional

import torch
from datasets import load_dataset
from peft import AutoPeftModelForCausalLM, LoraConfig
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments

from trl import SFTTrainer
from trl.trainer import ConstantLengthDataset

import textwrap

In [6]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    "./results/final_merged_checkpoint/",
    quantization_config=bnb_config,
    device_map={"": 0},
    trust_remote_code=True,
    use_auth_token=True,
)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
from transformers import pipeline

pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer="meta-llama/Llama-2-7b-hf"
)

In [69]:
def qa(text: str, full = False):
    # textwrap.dedent gets rid of indenting at the start of each newline
    text = textwrap.dedent(f"""
        Below is an inquiry related to SUNY Brockport - from academics, admissions, and faculty support to student life. Prioritize accuracy and brevity.

        ### Instruction:
        {text}

        ### Response:
        """)
    
    response = pipe(text, max_length=100, do_sample=True, top_k=50, top_p=0.95, temperature=1.0)
    response = response[0]['generated_text']
    response = response.split("### Response:\n")[1] if not full else response

    return response


In [70]:
print(qa("How do I apply?", full = True))


Below is an inquiry related to SUNY Brockport - from academics, admissions, and faculty support to student life. Prioritize accuracy and brevity.

### Instruction:
How do I apply?

### Response:
You can apply online for admission to the School of Health Sciences and Human Services at SUNY Brockport.
