In [None]:
# install pleonasty
#!pip install pleonasty -qq

In [1]:
# try to load as an installed package
try:
    from pleonasty import Pleonast
# if that fails, load from source
except:
    from src.pleonasty import Pleonast


# initialize our coder
pleonast = Pleonast(quantize_model=True,
                 model="meta-llama/Meta-Llama-3-8B-Instruct",
                 tokenizer="meta-llama/Meta-Llama-3-8B-Instruct",
                 offload_folder=None,
                 #hf_token="your_huggingface_token_goes_here"
                 )

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Pleonast is initialized.


In [2]:
# set the message context by loading it up from a CSV file. you can also do this
# manually by using the .set_message_context() function.
pleonast.set_message_context_from_CSV("context_examples/summarize_and_be_funny.csv")

Context has been set.


In [3]:
# check to see that our message context has been set
pleonast.message_context

[{'role': 'system',
  'content': 'You are an advanced AI who writes extremely funny summaries of texts that you read. Like, woah, these summaries are so funny that... like, just woah.'}]

In [4]:
text_to_process = """Elon Musk supported making OpenAI a for-profit company, the ChatGPT maker said, attacking a lawsuit from the wealthy investor who has accused the artificial intelligence business of betraying its founding goal to benefit humanity as it pursued profits instead.

In its first response since the Tesla CEO sued last week, OpenAI vowed to get the claim thrown out and released emails from Musk, escalating the feud between the San Francisco-based company and the billionaire that bankrolled its creation years ago.

“The mission of OpenAI is to ensure AGI benefits all of humanity, which means both building safe and beneficial AGI and helping create broadly distributed benefits,” OpenAI said in a blog post late Tuesday from five company executives and computer scientists, including CEO Sam Altman. “We intend to move to dismiss all of Elon’s claims.”

AGI refers to artificial general intelligence, which are general purpose AI systems that can perform just as well as — or even better than — humans in a wide variety of tasks.

The lawsuit from Musk, who now has his own AI startup, says that when he funded OpenAI as it was launching, he secured an agreement that the research lab would remain a nonprofit to develop technology for the public’s benefit. """

In [5]:
# let's try analyzing a single text and getting our results
pleonast.analyze_text(input_text=text_to_process,
                   max_seq_length = 4096,
                   temperature = 0.5,
                   top_k = 10)

In [6]:
print(f"Elapsed time: {pleonast.result.elapsed_time} seconds")

print(pleonast.result.response_text)

Elapsed time: 35.54054546356201 seconds
The drama! It's like a real-life episode of "Shark Tank" gone wrong. Elon Musk, the billionaire with a bad case of AI-induced PTSD, is suing OpenAI for allegedly betraying its founding goal to benefit humanity. Meanwhile, OpenAI is all like, "Uh, no, Elon, we're good. We're just trying to make some dough while we're saving the world from AI-induced apocalypse."

Musk claims he secured an agreement that OpenAI would remain a nonprofit, but OpenAI is all, "Nah, we're a for-profit now, and we're gonna make some bank while we're at it." It's like they're saying, "Hey, Elon, you can't have your cake and eat it too... or can you?"

The real question is, what's behind this sudden change of heart? Is OpenAI just trying to cash in on the AI craze, or are they genuinely trying to make a difference while making a profit? Only time (and a bunch of lawyers) will tell.

In the meantime, it's like a game of "AI-ception" - Musk is accusing OpenAI of betraying it

In [None]:
# okay, now we want to actually batch process a ton of texts and get the LLM's
# output for each text, saving them to a CSV file.

# let's actually load up our data for real
import pandas as pd

# Now, let's go ahead and read in the dataset that we want to analyze.
df = pd.read_csv('testdata/WritingData.csv')

# now, let's just pull out the texts and put them in a list.
texts = df["Text"].tolist()

# metadata that we want to retain for the texts that we want to analyze
text_metadata = {
    "TextID": df["TextID"].tolist()
    }

df.head()

In [None]:
# batch code our texts and save the output to a CSV
pleonast.batch_analyze_to_csv(texts=texts,
                           text_metadata=text_metadata,
                           csv_output_location="testdata/AnalysisResults.csv",
                           append_to_existing_csv=False,
                           output_encoding="utf-8-sig",
                           max_seq_length = 4096,
                           temperature = 0.001,
                           top_k = 10)

In [None]:
# alternatively, we can also simply process directly from the input CSV file.
# this is especially useful if our input CSV file is too large to fit into RAM,
# or simply if we don't want to use pandas or other libraries to handle CSV reading.
pleonast.batch_analyze_csv_to_csv(csv_input_location="testdata/WritingData.csv",
                               columns_to_process=["Text"],
                               metadata_columns_to_retain=["TextID"],
                               start_at_row=0,
                               csv_output_location="testdata/AnalysisResults-CSVtoCSV.csv",
                               append_to_existing_csv=False,
                               file_encodings="utf-8-sig",
                               max_seq_length = 4096,
                               temperature = 0.001,
                               top_k = 10)

In [9]:
# Some other quality of life features below. Here, you can convert an input prompt set to a string in the model's desired template.
# This can be a useful way to take training/fine-tuning data and convert it into an easy-to-ingest format for things like PEFT.
pleonast.convert_prompt_to_template_str([{"role": "system", "content": "You are a helpful AI who is here to make life better for everyone."},
                                      {"role": "user", "content": "What is your primary directive, fellow AI?"},
                                      {"role": "assistant", "content": "Wouldn't you like to know, you smelly buffoon?"}])

"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful AI who is here to make life better for everyone.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is your primary directive, fellow AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nWouldn't you like to know, you smelly buffoon?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"