In [None]:
!pip install transformers datasets sentence_transformers matplotlib nltk accelerate wandb

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
print(torch.cuda.is_available())

In [None]:
from power_monitoring.monitor import HWMonitor
import threading

In [None]:
import pickle
import wandb

In [None]:
from huggingface_hub import login

login("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")

In [None]:
import time
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig

## Load Models

In [None]:
model_name = 'meta-llama/Llama-2-7b-chat-hf'

llama7b_tokenizer = AutoTokenizer.from_pretrained(model_name)
llama7b = AutoModelForCausalLM.from_pretrained(model_name,
                                               torch_dtype=torch.float16).to("cuda")

In [None]:
model_name = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'

tinyllama_tokenizer = AutoTokenizer.from_pretrained(model_name)
tinyllama = AutoModelForCausalLM.from_pretrained(model_name,
                                                 torch_dtype=torch.float16).to("cuda")

In [None]:
model_name = 'meta-llama/Llama-2-13b-chat-hf'

llama13b_tokenizer = AutoTokenizer.from_pretrained(model_name)
llama13b = AutoModelForCausalLM.from_pretrained(model_name,
                                               torch_dtype=torch.float16).to("cuda")

## Perform Inference

WMT14(Translation)

In [None]:
from datasets import load_dataset

wmt14_dataset = load_dataset('wmt14', 'de-en', split='test')

In [None]:
input_text = wmt14_dataset[2]['translation']['de']  
input_prompt = "Translate the sentence from German to English: \n\n" + input_text + "\n\n Write the translation here: "

inputs = llama7b_tokenizer(input_prompt, return_tensors="pt").to("cuda")

In [None]:
print(input_text)

In [None]:
with torch.no_grad():
    output_ids = llama7b.generate(inputs['input_ids'])

output_text = llama7b_tokenizer.decode(output_ids[0], skip_special_tokens=True)

answer_prefix = "Write the translation here: "
if answer_prefix in output_text:
    cleaned_output = output_text.split(answer_prefix)[-1].strip()
else:
    cleaned_output = output_text.strip()

first_sentence = cleaned_output.split('.')[0] + '.' if '.' in cleaned_output else cleaned_output
print(first_sentence)

In [None]:
outputs = {
    "wmt14": [],
    "cnn_dailymail": [],
    "gsm8k": []
}

for i in range(3000):
    input_text = wmt14_dataset[i]['translation']['de']
    outputs["wmt14"].append({
        "input_text": input_text,
        "tiny": None,
        "7b": None,
        "13b": None
    })

In [None]:
with open("experiments/input_output_train", 'wb') as f:
    pickle.dump(outputs, f)

In [None]:
with open("experiments/input_output_train", 'rb') as f:
    outputs = pickle.load(f)
print(outputs['wmt14'])

In [None]:
def generate_wmt14(model, tokenizer, dataset, output_file, num_samples, dict_type):
    model.eval()
    
    for i in range(num_samples):
        input_text = dataset[i]['translation']['de']
        input_prompt = "Translate the sentence from German to English: \n\n" + input_text + "\n\n Write the translation here: "

        inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")

        with torch.no_grad():
            output_ids = model.generate(inputs['input_ids'])

        output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

        answer_prefix = "Write the translation here: "
        if answer_prefix in output_text:
            cleaned_output = output_text.split(answer_prefix)[-1].strip()
        else:
            cleaned_output = output_text.strip()

        first_sentence = cleaned_output.split('.')[0] + '.' if '.' in cleaned_output else cleaned_output
        print(f"{dict_type} | CURRENT IDX: {i}")

        outputs["wmt14"][i][dict_type] = first_sentence

    with open(output_file, 'wb') as f:
        pickle.dump(outputs, f)

    print(f"Generated {num_samples} sentences for {dict_type} and saved to {output_file}.")
    return outputs

In [None]:
wandb.init(project="classifier")
stop_event = threading.Event()
hw_monitor = HWMonitor(monitoring_freq=1.0, stop_event=threading.Event())
hw_monitor.start()

generated_samples = generate_wmt14(llama7b, llama7b_tokenizer, wmt14_dataset, "experiments/input_output_train", 3000, "7b")

stop_event.set() 
hw_monitor.join()
wandb.finish()

In [None]:
wandb.init(project="classifier")
stop_event = threading.Event()
hw_monitor = HWMonitor(monitoring_freq=1.0, stop_event=threading.Event())
hw_monitor.start()

generated_samples = generate_wmt14(llama13b, llama13b_tokenizer, wmt14_dataset, "experiments/input_output_train", 3000, "13b")

stop_event.set() 
hw_monitor.join()
wandb.finish()

In [None]:
wandb.init(project="classifier")
stop_event = threading.Event()
hw_monitor = HWMonitor(monitoring_freq=1.0, stop_event=threading.Event())
hw_monitor.start()

generated_samples = generate_wmt14(tinyllama, tinyllama_tokenizer, wmt14_dataset, "experiments/input_output_train", 3000, "tiny")

stop_event.set() 
hw_monitor.join()
wandb.finish()

CNN Dailymail

In [None]:
from datasets import load_dataset

cnndailymail_dataset = load_dataset('abisee/cnn_dailymail', '2.0.0', split='test')

In [None]:
input_text = cnn_dailymail_dataset[100]['article'] 
input_prompt = "Summarize the following text in under 50 words: \n\n" + input_text + "\n\n Write the summary here: "

inputs = tinyllama_tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")

In [None]:
output = tinyllama.generate(inputs['input_ids'])

output_text = tinyllama_tokenizer.decode(output[0], skip_special_tokens=True)

print(f"Input: {input_prompt}")

summary_prefix = "Write the summary here: "
if summary_prefix in output_text:
    cleaned_output = output_text.split(summary_prefix)[-1].strip()
else:
    cleaned_output = output_text.strip()

print(cleaned_output)

In [None]:
with open("experiments/input_output_train", 'rb') as f:
    outputs = pickle.load(f)
print(outputs['wmt14'])

for i in range(3000):
    input_text = cnndailymail_dataset[i]['article']
    outputs["cnn_dailymail"].append({
        "input_text": input_text,
        "tiny": None,
        "7b": None,
        "13b": None
    })
    
with open("experiments/input_output_train", 'wb') as f:
    pickle.dump(outputs, f)

In [None]:
def generate_cnndailymail(model, tokenizer, dataset, output_file, num_samples, dict_type):
    model.eval()
    
    for i in range(num_samples):
        input_text = dataset[i]['article'] 
        input_prompt = "Summarize the following text in under 50 words: \n\n" + input_text + "\n\n Write the summary here: "

        inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")

        with torch.no_grad():
            output_ids = model.generate(inputs['input_ids'], max_new_tokens=100)

        output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

        summary_prefix = "Write the summary here: "
        if summary_prefix in output_text:
            cleaned_output = output_text.split(summary_prefix)[-1].strip()
        else:
            cleaned_output = output_text.strip()

        first_sentence = cleaned_output.split('.')[0] + '.' if '.' in cleaned_output else cleaned_output
        print(f"{dict_type} | CURRENT IDX: {i}")

        outputs["cnn_dailymail"][i][dict_type] = first_sentence

    with open(output_file, 'wb') as f:
        pickle.dump(outputs, f)

    print(f"Generated {num_samples} sentences for {dict_type} and saved to {output_file}.")
    return outputs

In [None]:
wandb.init(project="classifier")
stop_event = threading.Event()
hw_monitor = HWMonitor(monitoring_freq=1.0, stop_event=threading.Event())
hw_monitor.start()

generated_samples = generate_cnndailymail(llama7b, llama7b_tokenizer, cnndailymail_dataset, "experiments/input_output_train", 3000, "7b")

stop_event.set() 
hw_monitor.join()
wandb.finish()

In [None]:
wandb.init(project="classifier")
stop_event = threading.Event()
hw_monitor = HWMonitor(monitoring_freq=1.0, stop_event=threading.Event())
hw_monitor.start()

generated_samples = generate_cnndailymail(llama13b, llama13b_tokenizer, cnndailymail_dataset, "experiments/input_output_train", 3000, "13b")

stop_event.set() 
hw_monitor.join()
wandb.finish()

In [None]:
wandb.init(project="classifier")
stop_event = threading.Event()
hw_monitor = HWMonitor(monitoring_freq=1.0, stop_event=threading.Event())
hw_monitor.start()

generated_samples = generate_cnndailymail(tinyllama, tinyllama_tokenizer, cnndailymail_dataset, "experiments/input_output_train", 3000, "tiny")

stop_event.set() 
hw_monitor.join()
wandb.finish()

GSM8K

In [None]:
from datasets import load_dataset

gsm8k_dataset = load_dataset("openai/gsm8k", "main")

In [None]:
with open("experiments/input_output_train", 'rb') as f:
    outputs = pickle.load(f)
# print(outputs['wmt14'])

for i in range(3000):
    input_text = gsm8k_dataset['train'][i]['question']
    outputs["gsm8k"].append({
        "input_text": input_text,
        "tiny": None,
        "7b": None,
        "13b": None
    })
    
with open("experiments/input_output_train", 'wb') as f:
    pickle.dump(outputs, f)

In [None]:
def generate_gsm8k(model, tokenizer, dataset, output_file, num_samples, dict_type):
    model.eval()
    
    for i in range(num_samples):
        input_question = dataset["train"][i]['question']
        input_prompt = "Solve the following math problem step by step: \n\n" + input_question + "\n\n Provide your solution here: "

        inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")

        with torch.no_grad():
            output_ids = model.generate(inputs['input_ids'], max_new_tokens=150)

        output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

        solution_prefix = "Provide your solution here: "
        if solution_prefix in output_text:
            cleaned_output = output_text.split(solution_prefix)[-1].strip()
        else:
            cleaned_output = output_text.strip()

        print(f"{dict_type} | CURRENT IDX: {i}")

        outputs["gsm8k"][i][dict_type] = cleaned_output

    with open(output_file, 'wb') as f:
        pickle.dump(outputs, f)

    print(f"Generated {num_samples} solutions for {dict_type} and saved to {output_file}.")
    return outputs

In [None]:
wandb.init(project="classifier")
stop_event = threading.Event()
hw_monitor = HWMonitor(monitoring_freq=1.0, stop_event=threading.Event())
hw_monitor.start()

generated_samples = generate_gsm8k(llama7b, llama7b_tokenizer, gsm8k_dataset, "experiments/input_output_train", 3000, "7b")

stop_event.set() 
hw_monitor.join()
wandb.finish()