# Evolutionary Prompt Selection
## Hyper-parameter Optimization

### Install Dependencies

In [1]:
!printf 'accelerate\nbitsandbytes\ndatasets\npinecone-client[grpc]\nsentencepiece\nsentence-transformers\ntorch\ntransformers\nwikipedia ' > requirements.txt  
!pip install -r requirements.txt


Collecting bitsandbytes (from -r requirements.txt (line 2))
  Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
Collecting pinecone-client[grpc] (from -r requirements.txt (line 4))
  Downloading pinecone_client-2.2.2-py3-none-any.whl (179 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.1/179.1 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
Collecting sentence-transformers (from -r requirements.txt (line 6))
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l- \ | done
Collecting wikipedia (from -r requirements.txt (line 9))
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l- \ done
Collecting loguru>=0.5.0 

### Import Statements

In [2]:
import json
import math
import os
import string
import time

import pandas as pd
import pinecone
import torch


from accelerate import Accelerator, notebook_launcher
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from transformers import GenerationConfig
from tqdm.auto import tqdm

from utils import LanguageModel, EPS, PWS
from nodes import Extractor

from kaggle_secrets import UserSecretsClient


  from tqdm.autonotebook import tqdm
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


### Define Global Variables

In [3]:
user_secrets = UserSecretsClient()
PINECONE_API_KEY = user_secrets.get_secret('PINECONE_API_KEY')
PINECONE_ENV = user_secrets.get_secret('PINECONE_ENVIRONMENT')
INDEX_NAME = 'plans'

EMBEDDING_MODEL = 'all-MiniLM-L6-v2'

MODEL_PATH = "stabilityai/StableBeluga-13B"
SYSTEM_TAG = "### System:\n"
USER_TAG = "### User:\n"
AI_TAG = "### Assistant:\n"

LOAD_IN_8BIT = True
HF_TOKEN = None

TEMPERATURE = 0.01
TOP_K = 50
TOP_P = 0.9
REPETITION_PENALTY= 1.0
MAX_NEW_TOKENS = 256

DATASET_NAME = "trivia_qa"

NUM_EXAMPLES = 3


### Initialize models and agents

In [4]:
generation_config = GenerationConfig(
    do_sample=True,
    temperature=TEMPERATURE,
    top_k=TOP_K,
    top_p=TOP_P,
    repetition_penalty=REPETITION_PENALTY,
    max_new_tokens=MAX_NEW_TOKENS
)

model = LanguageModel(MODEL_PATH, generation_config=generation_config,
                      load_in_8bit=LOAD_IN_8BIT, access_token=HF_TOKEN, device_map='auto',
                      system_tag=SYSTEM_TAG, user_tag=USER_TAG, ai_tag=AI_TAG)

agent = PWS(model)
extractor = Extractor(model)


Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/649 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

### Initialize database, prompter and dataset

In [5]:
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_ENV
)
index = pinecone.GRPCIndex(INDEX_NAME)
embedding_model = SentenceTransformer(EMBEDDING_MODEL)
prompter = EPS(index, embedding_model)

dataset = load_dataset(DATASET_NAME, 'rc.nocontext')
sanitize = lambda text: text.strip().lower().translate(str.maketrans('', '', string.punctuation))


Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/3.70k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.99k [00:00<?, ?B/s]

Downloading and preparing dataset trivia_qa/rc.nocontext (download: 2.48 GiB, generated: 118.84 MiB, post-processed: Unknown size, total: 2.60 GiB) to /root/.cache/huggingface/datasets/trivia_qa/rc.nocontext/1.2.0/e73c5e47a8704744fa9ded33504b35a6c098661813d1c2a09892eb9b9e9d59ae...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/138384 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/17944 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/17210 [00:00<?, ? examples/s]

Dataset trivia_qa downloaded and prepared to /root/.cache/huggingface/datasets/trivia_qa/rc.nocontext/1.2.0/e73c5e47a8704744fa9ded33504b35a6c098661813d1c2a09892eb9b9e9d59ae. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

### 1st Round

In [6]:
hp_opt_size = 30
hp_opt_data = dataset['train'][:hp_opt_size]
temp_values = [0.01, 0.25, 0.5, 0.75, 1.0]
rep_values = [1.0, 1.1, 1.2, 1.3]
results = []
for temp in temp_values:
    for rep in rep_values:
        generation_config = GenerationConfig(
            do_sample=True,
            temperature=temp,
            top_k=TOP_K,
            top_p=TOP_P,
            repetition_penalty=rep,
            max_new_tokens=MAX_NEW_TOKENS
        )
        model.generation_config = generation_config
        em = []
        for question, answer in tqdm(zip(hp_opt_data['question'], hp_opt_data['answer']),
                                     total=hp_opt_size):
            list_of_candidates = [sanitize(alias) for alias in answer["aliases"]]

            selection = prompter.select_examples(question, NUM_EXAMPLES)
            examples = [entry['metadata'] for entry in selection]
            response = agent.run(question, examples)
            answer = sanitize(response['output'])

            if answer not in list_of_candidates:
                extracted = sanitize(extractor(response['output'], question))
                if extracted not in list_of_candidates:
                    em.append(False)
                    continue 
            em.append(True)

        print(f"Temperature: {temp}\nRepetition Penalty: {rep}\nScore: {sum(em)}\n")
        results.append({'temp':temp, 'rep':rep, 'score':sum(em)})
    
with open("results.json", "w") as f:
    json.dump(results, f)
    

  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.01
Repetition Penalty: 1.0
Score: 18



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.01
Repetition Penalty: 1.1
Score: 18



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.01
Repetition Penalty: 1.2
Score: 12



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.01
Repetition Penalty: 1.3
Score: 9



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.25
Repetition Penalty: 1.0
Score: 17



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.25
Repetition Penalty: 1.1
Score: 17



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.25
Repetition Penalty: 1.2
Score: 12



  0%|          | 0/30 [00:00<?, ?it/s]



  lis = BeautifulSoup(html).find_all('li')


Temperature: 0.25
Repetition Penalty: 1.3
Score: 6



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.5
Repetition Penalty: 1.0
Score: 15



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.5
Repetition Penalty: 1.1
Score: 16



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.5
Repetition Penalty: 1.2
Score: 12



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.5
Repetition Penalty: 1.3
Score: 10



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.75
Repetition Penalty: 1.0
Score: 16



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.75
Repetition Penalty: 1.1
Score: 15



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.75
Repetition Penalty: 1.2
Score: 12



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 0.75
Repetition Penalty: 1.3
Score: 4



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 1.0
Repetition Penalty: 1.0
Score: 18



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 1.0
Repetition Penalty: 1.1
Score: 14



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 1.0
Repetition Penalty: 1.2
Score: 11



  0%|          | 0/30 [00:00<?, ?it/s]

Temperature: 1.0
Repetition Penalty: 1.3
Score: 6



#### Results

In [7]:
with open('results.json', 'r') as f:
    results = json.load(f)

results_df = pd.DataFrame.from_dict(results)
results_df = results_df.rename(columns={'temp': 'Temperature',
                                        'rep': 'Repetition Penalty',
                                        'score': 'EM Score'})
results_df = results_df.pivot(index='Temperature', columns='Repetition Penalty')
results_df


Unnamed: 0_level_0,EM Score,EM Score,EM Score,EM Score
Repetition Penalty,1.0,1.1,1.2,1.3
Temperature,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0.01,18,18,12,9
0.25,17,17,12,6
0.5,15,16,12,10
0.75,16,15,12,4
1.0,18,14,11,6


### 2nd Round

In [8]:
top_3 = sorted(results, key=lambda r: r['score'], reverse=True)[:3]
hp_opt_size = 100
hp_opt_data = dataset['train'][:hp_opt_size]
results_v2 = []
for config in top_3:
    temp = config['temp']
    rep = config['rep']
    generation_config = GenerationConfig(
        do_sample=True,
        temperature=temp,
        top_k=TOP_K,
        top_p=TOP_P,
        repetition_penalty=rep,
        max_new_tokens=MAX_NEW_TOKENS
    )
    model.generation_config = generation_config
    em = []
    for question, answer in tqdm(zip(hp_opt_data['question'], hp_opt_data['answer']),
                                 total=hp_opt_size):
        list_of_candidates = [sanitize(alias) for alias in answer["aliases"]]

        selection = prompter.select_examples(question, NUM_EXAMPLES)
        examples = [entry['metadata'] for entry in selection]
        response = agent.run(question, examples)
        answer = sanitize(response['output'])

        if answer not in list_of_candidates:
            extracted = sanitize(extractor(response['output'], question))
            if extracted not in list_of_candidates:
                em.append(False)
                continue 
        em.append(True)

    print(f"Temperature: {temp}\nRepetition Penalty: {rep}\nScore: {sum(em)}\n")
    results_v2.append({'temp':temp, 'rep':rep, 'score':sum(em)})

with open("results_v2.json", "w") as f:
    json.dump(results_v2, f)


  0%|          | 0/100 [00:00<?, ?it/s]



  lis = BeautifulSoup(html).find_all('li')


Temperature: 0.01
Repetition Penalty: 1.0
Score: 49



  0%|          | 0/100 [00:00<?, ?it/s]

Temperature: 0.01
Repetition Penalty: 1.1
Score: 49



  0%|          | 0/100 [00:00<?, ?it/s]

Temperature: 1.0
Repetition Penalty: 1.0
Score: 47

