#**Notebook Description**

This notebook creates inference pipelines for evaluating different prompting techniques on both the LLaMA-7B pretrained model and the LLaMA-7B chat model, which we used to test which prompting techniques would work best.

We output predictions from these pretrained models to Drive, where we then calculated various metrics on the results.

#**Libraries**

In [None]:
!pip install "transformers==4.35" "datasets==2.13.0" "peft==0.4.0" "accelerate==0.21.0" "bitsandbytes==0.40.2" "trl==0.4.7" "safetensors>=0.3.1" "tiktoken" -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m32.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.6/485.6 kB[0m [31m43.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m18.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m81.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m99.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [None]:
!pip install sacrebleu rouge_score -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/106.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.3/106.3 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone


In [None]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
from    datasets import Dataset, load_dataset, load_metric
import  gc
import  pandas as pd
from    peft import (
            LoraConfig,
            get_peft_model,
            AutoPeftModelForCausalLM,
            PeftModel,
            PeftConfig,
)
from    pprint import pprint
from    random import randrange
import  torch
import  torch.nn as nn
from    tqdm.notebook import tqdm
import  transformers
from    transformers import (
            AutoTokenizer,
            AutoModelForCausalLM,
            BitsAndBytesConfig,
            DataCollatorForLanguageModeling,
            TrainingArguments,
            LlamaForCausalLM,
            LlamaTokenizer,
            pipeline,
            logging,
)
from transformers.pipelines.pt_utils import KeyDataset
from    trl import SFTTrainer
import  yaml

bleu = load_metric("sacrebleu")
rouge = load_metric("rouge")
logging.set_verbosity(logging.CRITICAL)

In [None]:
!nvidia-smi

Thu Feb 29 18:32:56 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla V100-SXM2-16GB           Off | 00000000:00:04.0 Off |                    0 |
| N/A   31C    P0              25W / 300W |      2MiB / 16384MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

#**Prompting Strategies**

#**Hugging Face**

In [None]:
HF_READ_TOKEN = "name" 
HF_WRITE_TOKEN = "name" 
HF_PIPELINE_TASK = "text-generation"
device_map          = {"": 0} # Load the entire model on the GPU 0
MODEL_NAME_FINETUNING = 'meta-llama/Llama-2-7b-hf-finetuned-policychat'
HF_MODEL_REPO       = "name"  

In [None]:
from huggingface_hub import notebook_login
notebook_login() #make sure you have a 'write' token

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

#**Load Dataset**

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
DATA_DIR = "/content/drive/MyDrive/NLX/Final project/finetuning-data"
OUTPUT_DIR = "/content/drive/MyDrive/NLX/Final project/modeling/output"
MERGED_MODEL_DIR = os.path.join(OUTPUT_DIR, '/merged-model')


data_0_path = os.path.join(DATA_DIR, "finetune-dataset-0.csv")
data_1_path = os.path.join(DATA_DIR, "finetune-dataset-1.csv")
data_2_path = os.path.join(DATA_DIR, "finetune-dataset-2.csv")
data_3_path = os.path.join(DATA_DIR, "finetune-dataset-3.csv")
data_4_path = os.path.join(DATA_DIR, "finetune-dataset-4.csv")
data_5_path = os.path.join(DATA_DIR, "finetune-dataset-5.csv")

prompting_techniques = {
    "No Prompting":         data_0_path,
    "Task instruction":     data_1_path,
    "One-shot prompting":   data_2_path,
    "Few-shot prompting":   data_3_path,
    "Prompt chaining":      data_4_path,
    "Active prompting":     data_5_path,
}
pprint(prompting_techniques)

{'Active prompting': '/content/drive/MyDrive/NLX/Final '
                     'project/finetuning-data/finetune-dataset-5.csv',
 'Few-shot prompting': '/content/drive/MyDrive/NLX/Final '
                       'project/finetuning-data/finetune-dataset-3.csv',
 'No Prompting': '/content/drive/MyDrive/NLX/Final '
                 'project/finetuning-data/finetune-dataset-0.csv',
 'One-shot prompting': '/content/drive/MyDrive/NLX/Final '
                       'project/finetuning-data/finetune-dataset-2.csv',
 'Prompt chaining': '/content/drive/MyDrive/NLX/Final '
                    'project/finetuning-data/finetune-dataset-4.csv',
 'Task instruction': '/content/drive/MyDrive/NLX/Final '
                     'project/finetuning-data/finetune-dataset-1.csv'}


In [None]:
prompting_types = {
    "No Prompting": "",

    "Task instruction": """Give the most concise answers possible to questions about AI policy, considering you are an expert of AI policy. \n
    """,

    "One-shot prompting": """Give an answer to my query by modeling the following example:
My Query: Summarize the UK's legislation on AI.
Your Answer: The UK's legislation on AI is currently decentralized, with no specific comprehensive law governing AI. Instead, existing laws such as data protection legislation (e.g., the Data Protection Act 2018), equalities and privacy laws (e.g., the Equality Act 2010 and the Human Rights Act 1998), and intellectual property laws (e.g., the Copyright, Designs and Patents Act 1988) play a role in regulating various aspects of AI development and usage. These laws impact data collection, discrimination, human rights implications, intellectual property rights, and the limitations on AI decision-making and surveillance tools in the workplace.\n
    """,

    "Few-shot prompting": """Give an answer to my query by modeling the following examples:
Summarize the UK's legislation on AI: The UK's legislation on AI is currently decentralized, with no specific comprehensive law governing AI. Instead, existing laws such as data protection legislation (e.g., the Data Protection Act 2018), equalities and privacy laws (e.g., the Equality Act 2010 and the Human Rights Act 1998), and intellectual property laws (e.g., the Copyright, Designs and Patents Act 1988) play a role in regulating various aspects of AI development and usage. These laws impact data collection, discrimination, human rights implications, intellectual property rights, and the limitations on AI decision-making and surveillance tools in the workplace.
What is the predicted impact of generative AI on jobs?: LinkedIn's analysis predicts that the jobs of 55% of the platform’s users will be impacted in some way by the adoption of generative AI.
What is the regulation around training powerful AI models in Europe?: Regulations in Europe mandate that developers must include drafting technical documentation, adhere to EU copyright laws, and provide detailed summaries of the content used for training. Moreover, for high-impact general purpose AI models that pose systemic risks, additional obligations apply. These obligations entail conducting model evaluations, assessing and mitigating systemic risks, performing adversarial testing, reporting serious incidents to the Commission, ensuring cybersecurity measures, and reporting on energy efficiency.
What are some drawbacks to the CASC approach?: Its rulemakings are inherently retroactive, it does not broadly ensure algorithmic rights for ADSs that do not qualify as CASC ADSs, and it does not resolve capacity issues at federal agencies.
How is AI related to the United States' geopolitical relations with China?: The U.S.-China relationship looms large over AI governance: as Beijing pursues a national strategy aimed at making China the global leader in “AI theories, technologies, and applications” by 2030, policymakers in Washington are struggling with how to place guardrails around AI development without undermining the United States’ technological edge.\n
    """,

    "Prompt chaining": """First, analyze the keywords in the query. Secondly, decipher the purpose of the query. Don’t explicitly write these. Your final answer should be a maximum of 3 sentences long. The first sentence should summarize what the question is asking. The second sentence should give the main answer to the query. The third sentence can be an additional point if you think some information is very important to the query. Format all the sentences into a single paragraph.

    """,
    "Active prompting": """Think of 3 possible different answers to the query but do not output them all. Only output the answer that is the shortest, most concrete, relevant to the query, and easy to understand by a college student. Do not include the reason for your pick.\n
    """
}

pprint(prompting_types)

{'Active prompting': 'Think of 3 possible different answers to the query but '
                     'do not output them all. Only output the answer that is '
                     'the shortest, most concrete, relevant to the query, and '
                     'easy to understand by a college student. Do not include '
                     'the reason for your pick.\n'
                     '\n'
                     '    ',
 'Few-shot prompting': 'Give an answer to my query by modeling the following '
                       'examples:\n'
                       "Summarize the UK's legislation on AI: The UK's "
                       'legislation on AI is currently decentralized, with no '
                       'specific comprehensive law governing AI. Instead, '
                       'existing laws such as data protection legislation '
                       '(e.g., the Data Protection Act 2018), equalities and '
                       'privacy laws (e.g., the Equality Act 2010 and the '


##**Convert to HF Dataset**

In [None]:
# Path to the full dataset
FINETUNE_DATA_PATH = prompting_techniques["No Prompting"]

# Load dataset
full_dataset = load_dataset('csv', data_files=FINETUNE_DATA_PATH)['train']

# Split dataset into train and test sets
split_dataset = full_dataset.train_test_split(test_size=0.2, seed=1)

# Assign train and test datasets
finetune_dataset = split_dataset['train']
eval_dataset = split_dataset['test']

# Process to the expected format of Llama
"""
Dataset should be structured in a way that's compatible with the model's expected input format.
For a causal language model like Llama, the typical input is a sequence of text,
and the model predicts the next token in the sequence.
In a question-answering setup, we might want to concatenate the question and answer
with some separator to form this sequence.
"""
def preprocess_data(examples): # Concatenate question and answer with a separator (like "\n")
    return {'text': [f"Q: {q} %-->% \nA: {a}" for q, a in zip(examples['question'], examples['answer'])]}

# Apply the preprocessing function to the datasets
finetune_dataset = finetune_dataset.map(preprocess_data, batched=True)
eval_dataset = eval_dataset.map(preprocess_data, batched=True)

# Print shapes to verify
print("Finetune (Train) Dataset Shape:", finetune_dataset.shape)
print("Evaluation (Test) Dataset Shape:", eval_dataset.shape)
pprint(finetune_dataset)
print(finetune_dataset[0]['text'])
pprint(eval_dataset)
print(eval_dataset[0]['text'])



  0%|          | 0/1 [00:00<?, ?it/s]



Finetune (Train) Dataset Shape: (333, 3)
Evaluation (Test) Dataset Shape: (84, 3)
Dataset({
    features: ['question', 'answer', 'text'],
    num_rows: 333
})
Q: What's the role of good leadership? %-->% 
A: Setting ethical standards, risk mitigation, and public trust and accountability.
Dataset({
    features: ['question', 'answer', 'text'],
    num_rows: 84
})
Q: What are some possible benefits of the widespread adoption and use of Generative AI? %-->% 
A: The widespread adoption and use of Generative AI presents several possible benefits to society, including: increased efficiency, innovation, providing assistance in multiple domains, devising personalized medical treatments, creating personalized education, and decreasing the information gap.


In [None]:
# Load the tokenizer for a sequence-to-sequence model, e.g., T5
def tokenize_qa_function(example, tokenizer):
    # Tokenize the question
    tokenized_question = tokenizer(
        example['question'],
        truncation=True,
        max_length=128,
        padding='max_length'
    )

    # Tokenize the answer
    tokenized_answer = tokenizer(
        example['answer'],
        truncation=True,
        max_length=128,
    )

    # Return the tokenized question and answer
    # Note: For training, you might only need the `input_ids` and `attention_mask` for the questions,
    # and `labels` for the answers. Adjust accordingly.
    return {
        'input_ids': tokenized_question.input_ids,
        # 'attention_mask': tokenized_question.attention_mask,
        'labels': tokenized_answer.input_ids
    }

#**LLaMA7B**

##**Load Model**

In [None]:
MODEL_NAME_BASE = 'meta-llama/Llama-2-7b-hf'

In [None]:
# model
model_pretrained = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME_BASE,
    load_in_8bit = True,
)
# tokenizer
tokenizer_pretrained = AutoTokenizer.from_pretrained(
    MODEL_NAME_BASE,
)

Downloading config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
################################################################################
train_pretrained_tokenized = finetune_dataset['train'].map(
    lambda examples: tokenize_qa_function(examples, tokenizer=tokenizer_pretrained),
    batched = True
)
test_pretrained_tokenized_ftune = finetune_dataset['test'].map(
    lambda examples: tokenize_qa_function(examples, tokenizer=tokenizer_pretrained),
    batched = True
)
################################################################################
test_pretrained_tokenized_eval = eval_dataset['test'].map(
    lambda examples: tokenize_qa_function(examples, tokenizer=tokenizer_pretrained),
    batched = True
)

print(train_pretrained_tokenized[0])
print(test_pretrained_tokenized_ftune[0])
print(test_pretrained_tokenized_eval[0])



{'question': "What's the role of good leadership?", 'answer': 'Setting ethical standards, risk mitigation, and public trust and accountability.', 'input_ids': [1, 1724, 29915, 29879, 278, 6297, 310, 1781, 26001, 29973], 'labels': [1, 21605, 11314, 936, 20801, 29892, 12045, 1380, 335, 362, 29892, 322, 970, 9311, 322, 3633, 3097, 29889]}
{'question': 'What are some possible benefits of the widespread adoption and use of Generative AI?', 'answer': 'The widespread adoption and use of Generative AI presents several possible benefits to society, including: increased efficiency, innovation, providing assistance in multiple domains, devising personalized medical treatments, creating personalized education, and decreasing the information gap.', 'input_ids': [1, 1724, 526, 777, 1950, 23633, 310, 278, 281, 2247, 29886, 949, 594, 3385, 322, 671, 310, 3251, 1230, 319, 29902, 29973], 'labels': [1, 450, 281, 2247, 29886, 949, 594, 3385, 322, 671, 310, 3251, 1230, 319, 29902, 22981, 3196, 1950, 23633,

##**Predictions**

In [None]:
# pipeline
pipe_pretrained = pipeline(
    HF_PIPELINE_TASK,
    model=model_pretrained,
    tokenizer=tokenizer_pretrained,
    temperature = .7,
    max_new_tokens = 100,
    repetition_penalty = 2.5
)

In [None]:
torch.cuda.empty_cache()
gc.collect()

60

In [None]:
# Test example
for item in test_pretrained_tokenized_eval:
    print("Q:", item['question'])
    print("A:", item['answer'])
    print("-"*80)
    print(pipe_pretrained(item['question'])[0]['generated_text'])
    torch.cuda.empty_cache()
    gc.collect()
    break

Q: What are some possible benefits of the widespread adoption and use of Generative AI?
A: The widespread adoption and use of Generative AI presents several possible benefits to society, including: increased efficiency, innovation, providing assistance in multiple domains, devising personalized medical treatments, creating personalized education, and decreasing the information gap.
--------------------------------------------------------------------------------
What are some possible benefits of the widespread adoption and use of Generative AI?
The first potential benefit is that it could help to automate repetitive tasks. For example, if you have a process where there’s an Excel spreadsheet full with data from various sources like sales reports or customer feedback surveys etc., then generativity might be able provide insights into what patterns exist within those datasets without having manual intervention by humans necessary for analysis – which would save time otherwise spent manua

In [None]:
df.to_csv(os.path.join(OUTPUT_DIR, 'test.csv'))

In [None]:
# save predictions to drive
models = []
promptings = []
questions = []
ground_truths = []
predictions = []

for item in tqdm(test_pretrained_tokenized_eval):

    for prompt_technique, prefix in tqdm(prompting_types.items()):

        question = f"{prefix}{item['question']}"
        ground_truth = item['answer']
        prediction = pipe_pretrained(question)[0]['generated_text']

        torch.cuda.empty_cache()
        gc.collect()

        models.append("LLaMA7B")
        promptings.append(prompt_technique)
        questions.append(item['question'])
        ground_truths.append(ground_truth)
        predictions.append(prediction)

LLaMA7B_predictions = pd.DataFrame(
    {
        'model': models,
        'prompting': promptings,
        'question': questions,
        'ground_truth': ground_truths,
        'prediction': predictions
    }
)

LLaMA7B_predictions.to_csv(os.path.join(OUTPUT_DIR, 'LLaMA7B-predictions.csv'))

LLaMA7B_predictions.head()

  0%|          | 0/84 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Unnamed: 0,model,prompting,question,ground_truth,prediction
0,LLaMA7B,No Prompting,What are some possible benefits of the widespr...,The widespread adoption and use of Generative ...,What are some possible benefits of the widespr...
1,LLaMA7B,Task instruction,What are some possible benefits of the widespr...,The widespread adoption and use of Generative ...,Give the most concise answers possible to ques...
2,LLaMA7B,One-shot prompting,What are some possible benefits of the widespr...,The widespread adoption and use of Generative ...,Give an answer to my query by modeling the fol...
3,LLaMA7B,Few-shot prompting,What are some possible benefits of the widespr...,The widespread adoption and use of Generative ...,Give an answer to my query by modeling the fol...
4,LLaMA7B,Prompt chaining,What are some possible benefits of the widespr...,The widespread adoption and use of Generative ...,"First, analyze the keywords in the query. Seco..."


In [None]:
del model_pretrained, tokenizer_pretrained

#**LLaMA7B Chat**

##**Load Model**

In [None]:
MODEL_NAME_CHAT = 'meta-llama/Llama-2-7b-chat-hf'

In [None]:
# model
model_chat = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME_CHAT,
    load_in_8bit = True,
)

# tokenizer
tokenizer_chat = AutoTokenizer.from_pretrained(
    MODEL_NAME_CHAT,
)

Downloading config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
################################################################################
train_chat_tokenized = finetune_dataset['train'].map(
    lambda examples: tokenize_qa_function(examples, tokenizer=tokenizer_chat),
    batched = True
)
test_chat_tokenized_ftune = finetune_dataset['test'].map(
    lambda examples: tokenize_qa_function(examples, tokenizer=tokenizer_chat),
    batched = True
)
################################################################################
test_chat_tokenized_eval = eval_dataset['test'].map(
    lambda examples: tokenize_qa_function(examples, tokenizer=tokenizer_chat),
    batched = True
)

print(train_chat_tokenized[0])
print(test_chat_tokenized_ftune[0])
print(test_chat_tokenized_eval[0])

Map:   0%|          | 0/333 [00:00<?, ? examples/s]

Map:   0%|          | 0/84 [00:00<?, ? examples/s]



{'question': "What's the role of good leadership?", 'answer': 'Setting ethical standards, risk mitigation, and public trust and accountability.', 'input_ids': [1, 1724, 29915, 29879, 278, 6297, 310, 1781, 26001, 29973], 'labels': [1, 21605, 11314, 936, 20801, 29892, 12045, 1380, 335, 362, 29892, 322, 970, 9311, 322, 3633, 3097, 29889]}
{'question': 'What are some possible benefits of the widespread adoption and use of Generative AI?', 'answer': 'The widespread adoption and use of Generative AI presents several possible benefits to society, including: increased efficiency, innovation, providing assistance in multiple domains, devising personalized medical treatments, creating personalized education, and decreasing the information gap.', 'input_ids': [1, 1724, 526, 777, 1950, 23633, 310, 278, 281, 2247, 29886, 949, 594, 3385, 322, 671, 310, 3251, 1230, 319, 29902, 29973], 'labels': [1, 450, 281, 2247, 29886, 949, 594, 3385, 322, 671, 310, 3251, 1230, 319, 29902, 22981, 3196, 1950, 23633,

##**Predictions**

In [None]:
pipe_chat = pipeline(
    HF_PIPELINE_TASK,
    model=model_chat,
    tokenizer=tokenizer_chat,
    temperature = .7,
    max_new_tokens = 100,
    repetition_penalty = 2.5
)

In [None]:
torch.cuda.empty_cache()
gc.collect()

144

In [None]:
# Test example
for item in test_chat_tokenized_eval:
    print("Q:", item['question'])
    print("A:", item['answer'])
    print("-"*80)
    print(pipe_chat(item['question'])[0]['generated_text'])
    torch.cuda.empty_cache()
    gc.collect()
    break

Q: What are some possible benefits of the widespread adoption and use of Generative AI?
A: The widespread adoption and use of Generative AI presents several possible benefits to society, including: increased efficiency, innovation, providing assistance in multiple domains, devising personalized medical treatments, creating personalized education, and decreasing the information gap.
--------------------------------------------------------------------------------
What are some possible benefits of the widespread adoption and use of Generative AI?
1. New forms or art, music & literature: generatives artists can create new types fo creativity that would be impossible for humans to produce by hand such as generating entire fictional worlds with unique cultures rules laws etc., creating algorithmically-generated musical compositions in various styles ect 2 Improved Efficiency In Data Analysis And Research : The ability generate vast amounts data quickly could revolutionize fields like scient

In [None]:
# save predictions to drive
models = []
promptings = []
questions = []
ground_truths = []
predictions = []

for item in tqdm(test_chat_tokenized_eval):

    for prompt_technique, prefix in tqdm(prompting_types.items()):

        question = f"{prefix}{item['question']}"
        ground_truth = item['answer']
        prediction = pipe_chat(question)[0]['generated_text']

        torch.cuda.empty_cache()
        gc.collect()

        models.append("LLaMA7B-chat")
        promptings.append(prompt_technique)
        questions.append(item['question'])
        ground_truths.append(ground_truth)
        predictions.append(prediction)

LLaMA7B_chat_predictions = pd.DataFrame(
    {
        'model': models,
        'prompting': promptings,
        'question': questions,
        'ground_truth': ground_truths,
        'prediction': predictions
    }
)

LLaMA7B_chat_predictions.to_csv(os.path.join(OUTPUT_DIR, 'LLaMA7B-chat-predictions.csv'))

LLaMA7B_chat_predictions.head()

  0%|          | 0/84 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Unnamed: 0,model,prompting,question,ground_truth,prediction
0,LLaMA7B-chat,No Prompting,What are some possible benefits of the widespr...,The widespread adoption and use of Generative ...,What are some possible benefits of the widespr...
1,LLaMA7B-chat,Task instruction,What are some possible benefits of the widespr...,The widespread adoption and use of Generative ...,Give the most concise answers possible to ques...
2,LLaMA7B-chat,One-shot prompting,What are some possible benefits of the widespr...,The widespread adoption and use of Generative ...,Give an answer to my query by modeling the fol...
3,LLaMA7B-chat,Few-shot prompting,What are some possible benefits of the widespr...,The widespread adoption and use of Generative ...,Give an answer to my query by modeling the fol...
4,LLaMA7B-chat,Prompt chaining,What are some possible benefits of the widespr...,The widespread adoption and use of Generative ...,"First, analyze the keywords in the query. Seco..."
