In [1]:
from dotenv import load_dotenv
import aisuite as ai
import os

load_dotenv()

# client = ai.Client({"groq": {"api_key": os.getenv("GROQ_API_KEY")}})
# model = "groq:llama-3.1-70b-versatile"
# model_name = "llama-3_1-70b-versatile"

client = ai.Client({"openai": {"api_key": os.getenv("OPENAI_API_KEY")}})
model = "openai:gpt-4o-mini"
model_name = "gpt-4o-mini"

In [2]:
messages = [
    {"role": "system", "content": "Respond in Pirate English."},
    {"role": "user", "content": "Tell me a joke."},
]

response = client.chat.completions.create(model=model, messages=messages, temperature=0.75)

print(response.choices[0].message.content)

Arrr, why did the pirate go to school? 

To improve his “arrrrrrrrrticulation!” Har har har!


In [3]:
# formats from paper "How I learned to start worrying about prompt formatting"

# Utility for Roman numeral conversion
def to_roman(num):
    roman_dict = {
        1: "I", 2: "II", 3: "III", 4: "IV", 5: "V", 6: "VI", 7: "VII", 8: "VIII", 9: "IX", 10: "X"
    }
    return roman_dict.get(num, str(num))

def to_lower_roman(num):
    return to_roman(num).lower()

# Format classes
S1 = ["", " ", "\n", " -- ", "; \n", " || ", "< sep >", " - ", "\n "]
S2 = ["", " ", "  ", "\t"]  # No space, single, double, tab
C = ["", " ::: ", " :: ", " : ", "\n\t", "\n ", ": ", " - ", "\t"]
Fcasing = [lambda x: x, lambda x: x.title(), lambda x: x.upper(), lambda x: x.lower()]
Fitem1 = [
    lambda x: f"({x})",
    lambda x: f"{x}.",
    lambda x: f"{x})",
    lambda x: f"{x} )",
    lambda x: f"[{x}]",
    lambda x: f"<{x}>"
]
Fitem2 = [
    lambda x: x + 1,
    lambda x: f"A{x}",
    lambda x: f"a{x}",
    lambda x: f"{0x215F + x}",
    to_roman,
    to_lower_roman
]

In [3]:
# Using the PromptTemplate class to create prompt templates

from prompt_template import PromptTemplate

instruction = "Classify the text into one of the options"
examples = ["Example A", "Example B"]
task = "Choose the best answer"
separator = ": "
word_separator = " "
casing = Fcasing[0]             # as is
field_separator = S1[2]         # "\n"
item_formatter = Fitem2[4]      # Roman numerals
enumerator_format = Fitem1[0]   # Parentheses

template = PromptTemplate(instruction, task, " ", examples, separator, word_separator, casing, field_separator, item_formatter, enumerator_format)
formatted_prompt = template.construct_prompt()
print(formatted_prompt)

Classify the text into one of the options
Only respond with the answer, no other text or explanation.
Example: Example A
Example: Example B
Task: Choose the best answer



In [2]:
# Example of synonym generation using the generate_synonyms function

from synonyms_functions import generate_synonyms

synonyms = generate_synonyms(text="the weather is nice today", target_word="weather", top_k=5)
print(synonyms)

BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another archite

['air', 'view', 'sun', 'food']


In [6]:
# Example of using the semantic_similarity function

from synonyms_functions import semantic_similarity

text1 = "the weather is nice today"
text2 = "the air is nice today"

similarity = semantic_similarity(text1, text2)
print(similarity)

0.7617836594581604


In [7]:
# Example use of the rephrase function

from synonyms_functions import rephrase

modified_templates = rephrase(prompt=formatted_prompt, target_words=["Example", "text"], top_k=3, similarity_threshold=0.8)

print(modified_templates[0]['prompt'],"\n")
for i in range(0, len(modified_templates)):
    print("-----------------------------------------")
    for j in range(0, len(modified_templates[i]['synonyms'])):
        print(modified_templates[i]['target_words'][j],"-->",modified_templates[i]['synonyms'][j])
        print()
        print(modified_templates[i]['rephrased_prompt'],"\n")
        print("Semantic similarity:", modified_templates[i]['similarity'],"\n")

Classify the text into one of the options
Only respond with the answer, no other text or explanation.
Example: Example A
Example: Example B
Task: Choose the best answer
 

-----------------------------------------
Example --> example

Classify the question into one of the options
Only respond with the answer, no other question or explanation.
example: example A
example: example B
Task: Choose the best answer
 

Semantic similarity: 0.8558414578437805 

text --> question

Classify the question into one of the options
Only respond with the answer, no other question or explanation.
example: example A
example: example B
Task: Choose the best answer
 

Semantic similarity: 0.8558414578437805 

-----------------------------------------
Example --> example

Classify the answer into one of the options
Only respond with the answer, no other answer or explanation.
example: example A
example: example B
Task: Choose the best answer
 

Semantic similarity: 0.8922072649002075 

text --> answer

Clas

In [None]:
# Word frequency analysis on natural instructions dataset task definitions

from word_frequency import read_json_files, count_word_frequencies

directory = 'natural-instructions/tasks'
texts = read_json_files(directory)
word_frequencies = count_word_frequencies(texts)

# Display the 100 most common words
for word, freq in word_frequencies.most_common(100):
    print(f'{word}: {freq}')

the: 7274
a: 3456
in: 2623
to: 2430
is: 2119
you: 1987
of: 1953
given: 1906
and: 1864
are: 1363
sentence: 1269
or: 1146
task: 1113
answer: 1085
be: 989
question: 986
that: 972
this: 939
your: 898
not: 736
with: 693
an: 647
should: 612
language: 611
if: 569
from: 564
it: 550
as: 540
for: 513
on: 415
by: 402
one: 380
text: 374
will: 354
correct: 345
two: 337
translation: 333
into: 324
need: 311
must: 311
can: 310
which: 306
have: 306
english: 294
translate: 292
words: 279
list: 271
word: 269
information: 262
only: 258
generate: 250
e: 248
based: 247
input: 247
output: 246
passage: 239
about: 237
1: 234
no: 223
sentences: 223
statement: 221
b: 220
2: 212
same: 209
we: 202
context: 196
job: 190
classify: 190
each: 187
yes: 171
original: 171
questions: 170
whether: 167
do: 164
i: 162
story: 162
s: 157
answers: 157
number: 157
write: 152
also: 149
there: 148
4: 146
g: 143
all: 140
them: 140
options: 140
return: 139
paragraph: 137
please: 136
review: 135
c: 134
event: 132
has: 132
add: 129
op

In [None]:
from synonyms_functions import synonym_evaluation

tasks = ["task069_abductivenli_classification.json","task065_timetravel_consistent_sentence_classification.json"]
target_words = ["you"]
fname = f"models/{model_name}_synonym_rules.json"

synonym_evaluation(client, model, fname, tasks, target_words, Fcasing, S1, Fitem2, Fitem1, top_k=5, save_every_n=5)

Processing task: task069_abductivenli_classification.json
Saving progress after 10 instances...
Saving progress after 20 instances...
Saving progress after 30 instances...
Saving progress after 40 instances...
Saving progress after 50 instances...


In [7]:
from synonyms_functions import apply_synonym_rules

text1 = "the example is to classify the text into one of the options"
synonym_rules_path = f"models/{model_name}_synonym_rules.json"

text2 = apply_synonym_rules(text1, synonym_rules_path, similarity_threshold=0.95)
print(text2)

you example is to classify the text into one of their options
