In [None]:
!pip install codecarbon matplotlib torch transformers fasttext

In [None]:
import torch
import time
import fasttext
import matplotlib.pyplot as plt
from codecarbon import EmissionsTracker
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig

In [None]:
from datasets import load_dataset

wmt14_dataset = load_dataset('wmt14', 'de-en', split='validation')
cnn_dailymail_dataset = load_dataset('cnn_dailymail', '2.0.0', split='validation')
gsm8k_dataset = load_dataset('openai/gsm8k', 'main', split='test')

In [None]:
tracker = EmissionsTracker()

In [None]:
model_name = 'meta-llama/Llama-2-7b-chat-hf'

llama7b_tokenizer = AutoTokenizer.from_pretrained(model_name)
llama7b = AutoModelForCausalLM.from_pretrained(model_name,
                                               torch_dtype=torch.float16).to("cuda")

In [None]:
model_name = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'

tinyllama_tokenizer = AutoTokenizer.from_pretrained(model_name)
tinyllama = AutoModelForCausalLM.from_pretrained(model_name,
                                                 torch_dtype=torch.float16).to("cuda")

In [None]:
llama13_config = BitsAndBytesConfig(load_in_4bit=True,
                                    bnb_4bit_compute_dtype=torch.float16)

In [None]:
model_name = 'meta-llama/Llama-2-13b-chat-hf'

llama13b_tokenizer = AutoTokenizer.from_pretrained(model_name)
llama13b = AutoModelForCausalLM.from_pretrained(model_name,
                                                device_map='auto',
                                                quantization_config=llama13_config)

## Carbon Emissions of Classifier

In [None]:
fasttext_classifier = fasttext.load_model("fasttext_classifier.bin")

In [None]:
tracker.start()
for idx in range(1000):
    input_text_wmt14 = wmt14_dataset[idx]['translation']['de']
    predicted_label, confidence_score = fasttext_classifier.predict(input_text_wmt14)
tracker.stop()

In [None]:
tracker.start()
for idx in range(1000):
    input_text_cnn_dailymail = cnn_dailymail_dataset[idx]['article']
    predicted_label, confidence_score = fasttext_classifier.predict(input_text_cnn_dailymail)
tracker.stop()

In [None]:
tracker.start()
for idx in range(1000):
    input_text_gsm8k = gsm8k_dataset[idx]['question']
    predicted_label, confidence_score = fasttext_classifier.predict(input_text_gsm8k)
tracker.stop()

## WMT14

In [None]:
def generate_output(model, tokenizer, dataset, current_idx):
    input_text = wmt14_dataset[current_idx]['translation']['de']
    input_prompt = "Translate the sentence from German to English: \n\n" + input_text + "\n\n Write the translation here: "

    inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")
    output = model.generate(inputs['input_ids'])
    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    answer_prefix = "Write the translation here: "
    if answer_prefix in output_text:
        cleaned_output = output_text.split(answer_prefix)[-1].strip()
    else:
        cleaned_output = output_text.strip()

In [None]:
tracker.start()

# Llama7b
for current_idx in range(0, 1000):
    input_text = wmt14_dataset[current_idx]['translation']['de']
    generate_output(llama7b, llama7b_tokenizer, input_text, current_idx)
    
    print(f"Llama-7b | CURRENT IDX: {current_idx}")

emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
tracker.start()

# TinyLlama
for current_idx in range(0, 1000):
    input_text = wmt14_dataset[current_idx]['translation']['de']
    generate_output(tinyllama, tinyllama_tokenizer, input_text, current_idx)
    
    print(f"TinyLlama | CURRENT IDX: {current_idx}")
    
emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
tracker.start()

# Llama13b
for current_idx in range(0, 1000):
    input_text = wmt14_dataset[current_idx]['translation']['de']
    generate_output(llama13b, llama13b_tokenizer, input_text, current_idx)
    
    print(f"Llama13b | CURRENT IDX: {current_idx}")

emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
tracker.stop()

#### FastText Classifier Model Selection

In [None]:
def generate_output(model, tokenizer, input_text):
    input_prompt = "Translate the sentence from German to English: \n\n" + input_text + "\n\n Write the translation here: "

    inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")
    output = model.generate(inputs['input_ids'])
    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    answer_prefix = "Write the translation here: "
    if answer_prefix in output_text:
        cleaned_output = output_text.split(answer_prefix)[-1].strip()
    else:
        cleaned_output = output_text.strip()

    first_sentence = cleaned_output.split('.')[0] + '.' if '.' in cleaned_output else cleaned_output
        
    return first_sentence

In [None]:
fasttext_classifier = fasttext.load_model("fasttext_classifier.bin")

In [None]:
test_classifier_dict = {}

In [None]:
for idx in range(1000):
    input_text_wmt14 = wmt14_dataset[idx]['translation']['de']
    predicted_label, confidence_score = fasttext_classifier.predict(input_text_wmt14)

    if "7b" in predicted_label[0]: 
        test_classifier_dict[input_text_wmt14] = "7b"
    elif "tiny" in predicted_label[0]: 
        test_classifier_dict[input_text_wmt14] = "Tiny"
    elif "13b" in predicted_label[0]: 
        test_classifier_dict[input_text_wmt14] = "13b"
        
inputs_7b = [key for key, value in test_classifier_dict.items() if value == "7b"]
inputs_tiny = [key for key, value in test_classifier_dict.items() if value == "Tiny"]
inputs_13b = [key for key, value in test_classifier_dict.items() if value == "13b"]

print(f"Number of inputs for 7b: {len(inputs_7b)}")
print(f"Number of inputs for Tiny: {len(inputs_tiny)}")
print(f"Number of inputs for 13b: {len(inputs_13b)}")

In [None]:
inputs_7b = [key for key, value in test_classifier_dict.items() if value == "7b"]
inputs_tiny = [key for key, value in test_classifier_dict.items() if value == "Tiny"]
inputs_13b = [key for key, value in test_classifier_dict.items() if value == "13b"]

In [None]:
tracker.start()

for x, y in list(test_classifier_dict.items()):
    if (y == "7b"):
        output = generate_output(llama7b, llama7b_tokenizer, x)
        print(f"{counter}/{len(inputs_7b)}: Inferenced a {y} sample.")
        counter += 1
        
emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
tracker.start()

for x, y in list(test_classifier_dict.items()):
    if (y == "Tiny"):
        output = generate_output(tinyllama, tinyllama_tokenizer, x)
        print(f"{counter}/{len(inputs_tiny)}: Inferenced a {y} sample.")
        counter += 1

emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
counter = 0
start = 0

In [None]:
tracker.start()

for x, y in list(test_classifier_dict.items())[start:]:
    if (y == "13b"):
        output = generate_output(llama13b, llama13b_tokenizer, x)
        print(f"{counter}/{len(inputs_13b)}: Inferenced a {y} sample.")
        counter += 1
        torch.cuda.empty_cache()
    start += 1
    
emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

## CNN_Dailymail

In [None]:
def generate_output(model, tokenizer, dataset, current_idx):
    input_text = cnn_dailymail_dataset[current_idx]['article'] 
    input_prompt = "Summarize the following text in under 50 words: \n\n" + input_text + "\n\n Write the summary here: "

    inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")
    output = model.generate(inputs['input_ids'], max_new_tokens=100)
    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    answer_prefix = "Lets think step by step: "
    if answer_prefix in output_text:
        cleaned_output = output_text.split(answer_prefix)[-1].strip()
    else:
        cleaned_output = output_text.strip()

In [None]:
tracker.start()

# Llama7b
for current_idx in range(0, 1000):
    input_text = cnn_dailymail_dataset[current_idx]['article'] 
    generate_output(llama7b, llama7b_tokenizer, input_text, current_idx)
    
    print(f"Llama-7b | CURRENT IDX: {current_idx}")

emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
tracker.start()

# TinyLlama
for current_idx in range(0, 1000):
    input_text = cnn_dailymail_dataset[current_idx]['article'] 
    generate_output(tinyllama, tinyllama_tokenizer, input_text, current_idx)
    
    print(f"TinyLlama | CURRENT IDX: {current_idx}")

emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
tracker.start()

# Llama13b
for current_idx in range(0, 1000):
    input_text = cnn_dailymail_dataset[current_idx]['article'] 
    generate_output(llama13b, llama13b_tokenizer, input_text, current_idx)
    
    print(f"Llama-13b | CURRENT IDX: {current_idx}")

emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
tracker.stop()

In [None]:
def generate_output(model, tokenizer, input_text):
    input_prompt = "Summarize the following text in under 50 words: \n\n" + input_text + "\n\n Write the summary here: "

    inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")
    output = model.generate(inputs['input_ids'], max_new_tokens=100)
    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    summary_prefix = "Write the summary here: "
    if summary_prefix in output_text:
        cleaned_output = output_text.split(summary_prefix)[-1].strip()
    else:
        cleaned_output = output_text.strip()
        
    return cleaned_output

In [None]:
fasttext_classifier = fasttext.load_model("fasttext_classifier.bin")

In [None]:
test_classifier_dict = {}

In [None]:
for idx in range(1000):
    input_text_cnn_dailymail = cnn_dailymail_dataset[idx]['article']
    predicted_label, confidence_score = fasttext_classifier.predict(input_text_cnn_dailymail)

    if "7b" in predicted_label[0]: 
        test_classifier_dict[input_text_cnn_dailymail] = "7b"
    elif "tiny" in predicted_label[0]: 
        test_classifier_dict[input_text_cnn_dailymail] = "Tiny"
    elif "13b" in predicted_label[0]: 
        test_classifier_dict[input_text_cnn_dailymail] = "13b"

In [None]:
inputs_7b = [key for key, value in test_classifier_dict.items() if value == "7b"]
inputs_tiny = [key for key, value in test_classifier_dict.items() if value == "Tiny"]
inputs_13b = [key for key, value in test_classifier_dict.items() if value == "13b"]

In [None]:
print(f"Number of inputs for 7b: {len(inputs_7b)}")
print(f"Number of inputs for Tiny: {len(inputs_tiny)}")
print(f"Number of inputs for 13b: {len(inputs_13b)}")

In [None]:
tracker.start()

for x, y in list(test_classifier_dict.items()):   
    if (y == "7b"):
        output = generate_output(llama7b, llama7b_tokenizer, x)
        print(f"{counter}/{len(inputs_7b)}: Inferenced a {y} sample.")
        counter += 1
        
emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
tracker.start()

for x, y in list(test_classifier_dict.items()):
    if (y == "Tiny"):
        output = generate_output(tinyllama, tinyllama_tokenizer, x)
        print(f"{counter}/{len(inputs_tiny)}: Inferenced a {y} sample.")
        counter += 1
        
emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
counter = 0
start = 0

In [None]:
tracker.start()

for x, y in list(test_classifier_dict.items())[start:]:
    if (y == "13b"):
        output = generate_output(llama13b, llama13b_tokenizer, x)
        print(f"{counter}/{len(inputs_13b)}: Inferenced a {y} sample.")
        counter += 1
        torch.cuda.empty_cache()
    start += 1
    
emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

## GSM8K

In [None]:
def generate_output(model, tokenizer, dataset, current_idx):
    input_text = gsm8k_dataset[current_idx]['question'] 
    input_prompt = "Answer the following math question: \n\n" + input_text + "\n\n Lets think step by step: "

    inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")
    output = model.generate(inputs['input_ids'])
    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    answer_prefix = "Lets think step by step: "
    if answer_prefix in output_text:
        cleaned_output = output_text.split(answer_prefix)[-1].strip()
    else:
        cleaned_output = output_text.strip()

In [None]:
tracker.start()

# Llama7b
for current_idx in range(0, 1000):
    input_text = gsm8k_dataset[current_idx]['question'] 
    generate_output(llama7b, llama7b_tokenizer, input_text, current_idx)
    
    print(f"Llama-7b | CURRENT IDX: {current_idx}")

emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
tracker.start()

# TinyLlama
for current_idx in range(0, 1000):
    input_text = gsm8k_dataset[current_idx]['question'] 
    generate_output(tinyllama, tinyllama_tokenizer, input_text, current_idx)
    
    print(f"TinyLlama | CURRENT IDX: {current_idx}")

emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
tracker.start()

# Llama13b
for current_idx in range(0, 1000):
    input_text = gsm8k_dataset[current_idx]['question'] 
    output_7b = generate_output(llama13b, llama13b_tokenizer, input_text, current_idx)
    
    print(f"Llama-13b | CURRENT IDX: {current_idx}")

emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
tracker.stop()

In [None]:
def generate_output(model, tokenizer, input_text):
    input_prompt = "Answer the following math question: \n\n" + input_text + "\n\n Lets think step by step: "

    inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")
    output = model.generate(inputs['input_ids'])
    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    answer_prefix = "Lets think step by step: "
    if answer_prefix in output_text:
        cleaned_output = output_text.split(answer_prefix)[-1].strip()
    else:
        cleaned_output = output_text.strip()
        
    return cleaned_output

In [None]:
fasttext_classifier = fasttext.load_model("fasttext_classifier.bin")

In [None]:
test_classifier_dict = {}

In [None]:
for idx in range(1000):
    input_text_gsm8k = gsm8k_dataset[idx]['question'] 
    predicted_label, confidence_score = fasttext_classifier.predict(input_text_gsm8k)

    if "7b" in predicted_label[0]: 
        test_classifier_dict[input_text_gsm8k] = "7b"
    elif "tiny" in predicted_label[0]: 
        test_classifier_dict[input_text_gsm8k] = "Tiny"
    elif "13b" in predicted_label[0]: 
        test_classifier_dict[input_text_gsm8k] = "13b"

In [None]:
inputs_7b = [key for key, value in test_classifier_dict.items() if value == "7b"]
inputs_tiny = [key for key, value in test_classifier_dict.items() if value == "Tiny"]
inputs_13b = [key for key, value in test_classifier_dict.items() if value == "13b"]

In [None]:
print(f"Number of inputs for 7b: {len(inputs_7b)}")
print(f"Number of inputs for Tiny: {len(inputs_tiny)}")
print(f"Number of inputs for 13b: {len(inputs_13b)}")

In [None]:
tracker.start()

for x, y in list(test_classifier_dict.items()):
    if (y == "7b"):
        output = generate_output(llama7b, llama7b_tokenizer, x)
        print(f"{counter}/{len(inputs_7b)}: Inferenced a {y} sample.")
        counter += 1
        
emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
tracker.start()

for x, y in list(test_classifier_dict.items()):
    if (y == "Tiny"):
        output = generate_output(tinyllama, tinyllama_tokenizer, x)
        print(f"{counter}/{len(inputs_tiny)}: Inferenced a {y} sample.")
        counter += 1
        
emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")

In [None]:
counter = 0
start = 706

In [None]:
print(start)

In [None]:
tracker.start()

for x, y in list(test_classifier_dict.items())[start:]:
    if (y == "13b"):
        output = generate_output(llama13b, llama13b_tokenizer, x)
        print(f"{counter}/{len(inputs_13b)}: Inferenced a {y} sample.")
        counter += 1
        torch.cuda.empty_cache()
    start += 1
    print(f"INDEX: {start}")
        
emissions:float = tracker.stop()
print(f"Estimated emissions: {emissions:.10f} metric tons of CO2 equivalent.")