In [1]:
!pip install transformers datasets torch ipywidgets sentence_transformers matplotlib nltk bitsandbytes accelerate


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
print(torch.cuda.is_available())

True


In [2]:
import pickle

In [3]:
!nvidia-smi

Sun Jun 23 21:10:33 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        On  | 00000000:17:00.0 Off |                  N/A |
| 30%   48C    P8              22W / 370W |   3647MiB / 24576MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce RTX 3070        On  | 00000000:65:00.0 Off |  

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

In [4]:
from huggingface_hub import login

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
import time
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"

#### Models

In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig

Llama7b

In [7]:
model_name = 'meta-llama/Llama-2-7b-chat-hf'

llama7b_tokenizer = AutoTokenizer.from_pretrained(model_name)
llama7b = AutoModelForCausalLM.from_pretrained(model_name,
                                               torch_dtype=torch.float16).to("cuda")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

TinyLlama

In [8]:
model_name = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'

tinyllama_tokenizer = AutoTokenizer.from_pretrained(model_name)
tinyllama = AutoModelForCausalLM.from_pretrained(model_name,
                                                 torch_dtype=torch.float16).to("cuda")

Llama13b

In [7]:
llama13_config = BitsAndBytesConfig(load_in_4bit=True,
                                    bnb_4bit_compute_dtype=torch.float16)

In [8]:
model_name = 'meta-llama/Llama-2-13b-chat-hf'

llama13b_tokenizer = AutoTokenizer.from_pretrained(model_name)
llama13b = AutoModelForCausalLM.from_pretrained(model_name,
                                                device_map='auto',
                                                quantization_config=llama13_config)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

### Basic Model Loading + Inference

#### Llama 7b

In [11]:
start_time = time.time()

input_text = "Once upon a time in a land far, far away"
inputs = llama7b_tokenizer(input_text, return_tensors="pt").to("cuda")

output = llama7b.generate(inputs['input_ids'], max_length=100)

output_text = llama7b_tokenizer.decode(output[0], skip_special_tokens=True)
print(output_text)

execution_time = time.time() - start_time
print()
print(f"Execution time: {execution_time} seconds")

Once upon a time in a land far, far away, there was a magical kingdom called "The Land of the Free." In this kingdom, everyone was free to do as they pleased, and no one was ever punished for their actions.

One day, a young prince named "Liberty" decided to take a walk in the forest. As he wandered deeper into the woods, he came across a beautiful fairy named "Justice."

"Who

Execution time: 2.9925544261932373 seconds


#### TinyLlama

In [15]:
start_time = time.time()

input_text = "Once upon a time in a land far, far away"
inputs = tinyllama_tokenizer(input_text, return_tensors="pt").to("cuda")

output = tinyllama.generate(inputs['input_ids'], max_length=100)

output_text = tinyllama_tokenizer.decode(output[0], skip_special_tokens=True)
print(output_text)

execution_time = time.time() - start_time
print()
print(f"Execution time: {execution_time} seconds")

Once upon a time in a land far, far away, there lived a young girl named Lily. Lily was a kind and gentle girl, always looking for ways to help others. One day, while walking through the woods, Lily stumbled upon a group of animals who were being hunted by a pack of wolves. The wolves were fierce and dangerous, and they were determined to take the animals for their own. Lily knew that she had to do something to

Execution time: 1.5956108570098877 seconds


#### Llama 13b

In [11]:
start_time = time.time()

input_text = "Once upon a time in a land far, far away"
inputs = llama13b_tokenizer(input_text, return_tensors="pt").to("cuda")

output = llama13b.generate(inputs['input_ids'], max_length=100)

output_text = llama13b_tokenizer.decode(output[0], skip_special_tokens=True)
print(output_text)

execution_time = time.time() - start_time
print()
print(f"Execution time: {execution_time} seconds")



Once upon a time in a land far, far away, there was a magical kingdom called "Happily Ever Laughter." The kingdom was ruled by a wise and witty king named "King Punsley" who was loved by all his subjects for his ability to make them laugh with his clever jokes and puns.

One day, a brave and clever princess named "Princess Punsalot" decided to go on a quest to

Execution time: 5.729012727737427 seconds


In [12]:
torch.cuda.empty_cache()

### **WMT 2014 (Machine Translation)** 
is a collection of datasets used in shared tasks of the Ninth Workshop on Statistical Machine Translation

In [9]:
from datasets import load_dataset

wmt14_dataset = load_dataset('wmt14', 'de-en', split='test')

#### Example inference

In [15]:
input_text = wmt14_dataset[0]['translation']['en']  
input_prompt = f"Translate to English: {input_text}"

inputs = llama7b_tokenizer(input_prompt, return_tensors="pt")

In [16]:
output = llama7b.generate(inputs['input_ids'], max_length=50)

output_text = llama7b_tokenizer.decode(output[0], skip_special_tokens=True)
print(output_text)

Translate to English: Gutach: Increased safety for pedestrians and cyclists in the city

Gutach: Increased safety for pedestrians and cyclists in the city


#### Llama 7b

In [17]:
num_examples = 5 

state_time = time.time()
for i in range(num_examples):
    input_text = wmt14_dataset[i]['translation']['de']
    input_prompt = f"Translate to English: {input_text}"
    
    inputs = llama7b_tokenizer(input_prompt, return_tensors="pt", truncation=True, max_length=50)
    output = llama7b.generate(inputs['input_ids'], max_new_tokens=50)
    output_text = llama7b_tokenizer.decode(output[0], skip_special_tokens=True)
    print(f"Input: {input_prompt}")
    print(f"Output: {output_text}")
    print()

execution_time = time.time() - start_time
print()
print(f"Execution time: {execution_time} seconds")

Input: Translate to English: Gutach: Noch mehr Sicherheit für Fußgänger
Output: Translate to English: Gutach: Noch mehr Sicherheit für Fußgänger und Radfahrer

Original text:
Gutach: Noch mehr Sicherheit für Fußgänger und Radfahrer

Translation:
Gutach: More safety for pedestrians and cyclists



Input: Translate to English: Sie stehen keine 100 Meter voneinander entfernt: Am Dienstag ist in Gutach die neue B 33-Fußgängerampel am Dorfparkplatz in Betrieb genommen worden - in Sichtweite der älteren Rathausampel.
Output: Translate to English: Sie stehen keine 100 Meter voneinander entfernt: Am Dienstag ist in Gutach die neue B 33-Fußgängerampel am Dorfparkplatz in Betrieb genommen worden.

I hope this helps! Let me know if you have any questions.

Input: Translate to English: Zwei Anlagen so nah beieinander: Absicht oder Schildbürgerstreich?
Output: Translate to English: Zwei Anlagen so nah beieinander: Absicht oder Schildbürgerstreich?

Context: A lawyer is discussing two nearby industr

In [None]:
torch.cuda.empty_cache()

#### Tiny Llama

In [12]:
num_examples = 5 

state_time = time.time()
for i in range(num_examples):
    input_text = wmt14_dataset[i]['translation']['de']
    input_prompt = f"Translate to English: {input_text}"
    
    inputs = tinyllama_tokenizer(input_prompt, return_tensors="pt", truncation=True, max_length=50)
    output = tinyllama.generate(inputs['input_ids'], max_new_tokens=50)
    output_text = tinyllama_tokenizer.decode(output[0], skip_special_tokens=True)
    print(f"Input: {input_prompt}")
    print(f"Output: {output_text}")
    print()

execution_time = time.time() - start_time
print()
print(f"Execution time: {execution_time} seconds")

KeyboardInterrupt: 

In [None]:
torch.cuda.empty_cache()

#### Llama 13b

In [21]:
num_examples = 5 

state_time = time.time()
for i in range(num_examples):
    input_text = wmt14_dataset[i]['translation']['de']
    input_prompt = f"Translate to English: {input_text}"
    
    inputs = llama13b_tokenizer(input_prompt, return_tensors="pt", truncation=True, max_length=50)
    output = llama13b.generate(inputs['input_ids'], max_new_tokens=50)
    output_text = llama13b_tokenizer.decode(output[0], skip_special_tokens=True)
    print(f"Input: {input_prompt}")
    print(f"Output: {output_text}")
    print()

execution_time = time.time() - start_time
print()
print(f"Execution time: {execution_time} seconds")

Input: Translate to English: Gutach: Noch mehr Sicherheit für Fußgänger
Output: Translate to English: Gutach: Noch mehr Sicherheit für Fußgänger und Radfahrer

Please provide the translation to English.

"Gutach: More safety for pedestrians and cyclists"

Would you like me to translate the entire text or just the headline? Additionally

Input: Translate to English: Sie stehen keine 100 Meter voneinander entfernt: Am Dienstag ist in Gutach die neue B 33-Fußgängerampel am Dorfparkplatz in Betrieb genommen worden - in Sichtweite der älteren Rathausampel.
Output: Translate to English: Sie stehen keine 100 Meter voneinander entfernt: Am Dienstag ist in Gutach die neue B 33-Fußgängerampel am Dorfparkplatz in Betrieb genommen worden.

Translation to English: You are not 100 meters apart: On Tuesday, the new pedestrian traffic light at the village park parking lot on the B 33 was put into operation in Gutach

Input: Translate to English: Zwei Anlagen so nah beieinander: Absicht oder Schildbürg

In [7]:
torch.cuda.empty_cache()

NameError: name 'torch' is not defined

#### Functionalize Inference

In [15]:
token_ranges = {
    '0-50': 0,
    '51-100': 0,
    '101-150': 0
}

max_tokens = -1

for idx, data in enumerate(wmt14_dataset):
    input_text = data['translation']['de']
    tokens = llama13b_tokenizer(input_text, return_tensors="pt")
    num_tokens = len(tokens['input_ids'][0])
    
    if num_tokens > max_tokens:
        max_tokens = num_tokens
        max_tokens_idx = idx
    
    if num_tokens <= 50:
        token_ranges['0-50'] += 1
    elif num_tokens <= 100:
        token_ranges['51-100'] += 1
    elif num_tokens <= 150:
        token_ranges['101-150'] += 1

print("Number of data points in different token ranges:")
for key, value in token_ranges.items():
    print(f"{key}: {value}")

print(f"\nData point with the most tokens is at index: {max_tokens_idx}")
print(f"Number of tokens: {max_tokens}")
print(f"Input text: {wmt14_dataset[max_tokens_idx]['translation']['de']}")

Number of data points in different token ranges:
0-50: 2290
51-100: 692
101-150: 21

Data point with the most tokens is at index: 393
Number of tokens: 131
Input text: Die früher supergeheime NSA, deren Spitzname einst No Such Agency (Keine solche Behörde) lautete, findet sich inzwischen im hellen Licht der Öffentlichkeit und sieht sich nach den in den letzten Monaten bekannt gewordenen Enthüllungen über ihr ausgedehntes Überwachungsprogramm im In- und Ausland scharfer Kritik ausgesetzt – ein Resultat der geheimen NSA-Daten, die vom desillusionierten ehemaligen NSA-Mitarbeiter Edward Snowden gestohlen und veröffentlicht wurden.


In [10]:
def generate_output(model, tokenizer, dataset, current_idx):
    outputs = []
    
    input_text = wmt14_dataset[current_idx]['translation']['de']
    input_prompt = "Translate the sentence from German to English: \n\n" + input_text + "\n\n Write the translation here: "

    inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")
    output = model.generate(inputs['input_ids'])
    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    answer_prefix = "Write the translation here: "
    if answer_prefix in output_text:
        cleaned_output = output_text.split(answer_prefix)[-1].strip()
    else:
        cleaned_output = output_text.strip()

    first_sentence = cleaned_output.split('.')[0] + '.' if '.' in cleaned_output else cleaned_output
    outputs.append(first_sentence)
    
    return outputs

## Inference 

In [12]:
input_texts = []
outputs_7b = []
outputs_tiny = []
outputs_13b = []

In [13]:
for current_idx in range(396, 1000):
    input_text = wmt14_dataset[current_idx]['translation']['de']
    output_7b = generate_output(llama7b, llama7b_tokenizer, input_text, current_idx)

    outputs_7b.append(output_7b)
    
    print(f"Llama-7b | CURRENT IDX: {current_idx} | Length: {len(outputs_7b)}")
    # with open('input_output_pairs_wmt14_7b', 'wb') as f:
    #     pickle.dump(outputs_7b, f)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Llama-7b | CURRENT IDX: 396 | Length: 397
Llama-7b | CURRENT IDX: 397 | Length: 398
Llama-7b | CURRENT IDX: 398 | Length: 399
Llama-7b | CURRENT IDX: 399 | Length: 400
Llama-7b | CURRENT IDX: 400 | Length: 401
Llama-7b | CURRENT IDX: 401 | Length: 402
Llama-7b | CURRENT IDX: 402 | Length: 403
Llama-7b | CURRENT IDX: 403 | Length: 404
Llama-7b | CURRENT IDX: 404 | Length: 405
Llama-7b | CURRENT IDX: 405 | Length: 406
Llama-7b | CURRENT IDX: 406 | Length: 407
Llama-7b | CURRENT IDX: 407 | Length: 408
Llama-7b | CURRENT IDX: 408 | Length: 409
Llama-7b | CURRENT IDX: 409 | Length: 410
Llama-7b | CURRENT IDX: 410 | Length: 411
Llama-7b | CURRENT IDX: 411 | Length: 412
Llama-7b | CURRENT IDX: 412 | Length: 413
Llama-7b | CURRENT IDX: 413 | Length: 414
Llama-7b | CURRENT IDX: 414 | Length: 415
Llama-7b | CURRENT IDX: 415 | Length: 416
Llama-7b | CURRENT IDX: 416 | Length: 417
Llama-7b | CURRENT IDX: 417 | Length: 418
Llama-7b | CURRENT IDX: 418 | Length: 419
Llama-7b | CURRENT IDX: 419 | Leng

In [16]:
with open('input_output_pairs_wmt14_7b', 'rb') as f:
    outputs_7b = pickle.load(f)

print(len(outputs_7b))
print(outputs_7b[998:1000])

1000
[['After years of negotiations, we have now reached an agreement with the International Atomic Energy Agency to clear up the differences of the past few years, wrote Foreign Minister Mohammed Jawad Sarif on his Facebook page.'], ['It may still be a long way off, but the atomic energy negotiator is satisfied with the negotiation process and even more optimistic that both sides will come to a solution in the end.']]


In [21]:
for current_idx in range(0, 1000):
    input_text = wmt14_dataset[current_idx]['translation']['de']
    output_tiny = generate_output(tinyllama, tinyllama_tokenizer, input_text, current_idx)

    outputs_tiny.append(output_tiny)
    
    print(f"TinyLlama | CURRENT IDX: {current_idx} | Length: {len(outputs_tiny)}")
    # with open('input_output_pairs_wmt14_tiny', 'wb') as f:
    #     pickle.dump(outputs_tiny, f)

TinyLlama | CURRENT IDX: 0 | Length: 1
TinyLlama | CURRENT IDX: 1 | Length: 2
TinyLlama | CURRENT IDX: 2 | Length: 3
TinyLlama | CURRENT IDX: 3 | Length: 4
TinyLlama | CURRENT IDX: 4 | Length: 5
TinyLlama | CURRENT IDX: 5 | Length: 6
TinyLlama | CURRENT IDX: 6 | Length: 7
TinyLlama | CURRENT IDX: 7 | Length: 8
TinyLlama | CURRENT IDX: 8 | Length: 9
TinyLlama | CURRENT IDX: 9 | Length: 10
TinyLlama | CURRENT IDX: 10 | Length: 11
TinyLlama | CURRENT IDX: 11 | Length: 12
TinyLlama | CURRENT IDX: 12 | Length: 13
TinyLlama | CURRENT IDX: 13 | Length: 14
TinyLlama | CURRENT IDX: 14 | Length: 15
TinyLlama | CURRENT IDX: 15 | Length: 16
TinyLlama | CURRENT IDX: 16 | Length: 17
TinyLlama | CURRENT IDX: 17 | Length: 18
TinyLlama | CURRENT IDX: 18 | Length: 19
TinyLlama | CURRENT IDX: 19 | Length: 20
TinyLlama | CURRENT IDX: 20 | Length: 21
TinyLlama | CURRENT IDX: 21 | Length: 22
TinyLlama | CURRENT IDX: 22 | Length: 23
TinyLlama | CURRENT IDX: 23 | Length: 24
TinyLlama | CURRENT IDX: 24 | Lengt

In [17]:
with open('input_output_pairs_wmt14_tiny', 'rb') as f:
    outputs_tiny = pickle.load(f)

print(len(outputs_tiny))
print(outputs_tiny[:10])

1000
[['Translation: Still more safety for pedestrians'], ['You are not 100 meters away from each other: On Monday, the new B 33-foot pedestrian crossing was opened at the Gutach village park in the town center.'], ["Two stations so close together: A secret or citizen's strike?"], ['This question has been clearly answered by the mayor yesterday.'], ['The building insulation system was installed in the old town hall because it secures the school route, explained Eckert yesterday.'], ['The Kluser-Ampel secures both drivers and bus passengers, as well as the residents of the Bergle district.'], ['The official opening of the recently implemented facility is crucial for the intersection of Sulzbachweg and Kirchstraße.'], ['We have the museum, two churches, Kurpark, the bus stop, an emergency physician, a bank, and the traffic flow from the residential area ›Grub‹.'], ['In the high traffic and pedestrian flow, a new traffic light was installed to ensure the safety of pedestrians.'], ['This c

In [12]:
for current_idx in range(980, 1000):
    input_text = wmt14_dataset[current_idx]['translation']['de']
    output_13b = generate_output(llama13b, llama13b_tokenizer, input_text, current_idx)

    outputs_13b.append(output_13b)
    
    print(f"Llama13b | CURRENT IDX: {current_idx} | Length: {len(outputs_13b)}")
    # with open('input_output_pairs_wmt14_13b', 'wb') as f:
    #     pickle.dump(outputs_13b, f)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Llama13b | CURRENT IDX: 980 | Length: 981
Llama13b | CURRENT IDX: 981 | Length: 982
Llama13b | CURRENT IDX: 982 | Length: 983
Llama13b | CURRENT IDX: 983 | Length: 984
Llama13b | CURRENT IDX: 984 | Length: 985
Llama13b | CURRENT IDX: 985 | Length: 986
Llama13b | CURRENT IDX: 986 | Length: 987
Llama13b | CURRENT IDX: 987 | Length: 988
Llama13b | CURRENT IDX: 988 | Length: 989
Llama13b | CURRENT IDX: 989 | Length: 990
Llama13b | CURRENT IDX: 990 | Length: 991
Llama13b | CURRENT IDX: 991 | Length: 992
Llama13b | CURRENT IDX: 992 | Length: 993
Llama13b | CURRENT IDX: 993 | Length: 994
Llama13b | CURRENT IDX: 994 | Length: 995
Llama13b | CURRENT IDX: 995 | Length: 996
Llama13b | CURRENT IDX: 996 | Length: 997
Llama13b | CURRENT IDX: 997 | Length: 998
Llama13b | CURRENT IDX: 998 | Length: 999
Llama13b | CURRENT IDX: 999 | Length: 1000


In [14]:
with open('input_output_pairs_wmt14_13b', 'rb') as f:
    outputs_13b = pickle.load(f)

print(len(outputs_13b))
print(outputs_13b[999])

1000
['It would be a long way still, but the atomic wholesaler would be satisfied with the negotiations and optimistic that both sides would reach a solution at the end.']


In [32]:
input_output_pairs = []

In [33]:
wmt14_dataset[500]['translation']['de']

'Anwälte müssen die höchsten Standards an Integrität aufrechterhalten und in der Öffentlichkeit für Vertrauen und Zuversicht sorgen.'

In [34]:
for idx in range(len(outputs_7b)):
    outputs = {
        'input': wmt14_dataset[idx]['translation']['de'],
        'output_7b': outputs_7b[idx],
        'output_tiny': outputs_tiny[idx],
        'output_13b': outputs_13b[idx]
    }
    
    input_output_pairs.append(outputs)

# with open('input_output_pairs_wmt14.pkl', 'wb') as f:
#     pickle.dump(input_output_pairs, f)

In [35]:
with open('input_output_pairs_wmt14.pkl', 'rb') as f:
    input_output_pairs = pickle.load(f)

len(input_output_pairs)

1000

In [36]:
print(input_output_pairs[:100])

[{'input': 'Gutach: Noch mehr Sicherheit für Fußgänger', 'output_7b': ['Good luck!'], 'output_tiny': ['Translation: Still more safety for pedestrians'], 'output_13b': ['(Your answer)\n\nCorrect answer:\n\n"Expert opinion: Even more safety for pedestrians"\n\nTranslation:\n\nGutach means "expert opinion" in German, so the sentence "Gutach: Noch mehr Sicherheit für Fußgänger" can be translated to English as "Expert opinion: Even more safety for pedestrians".']}, {'input': 'Sie stehen keine 100 Meter voneinander entfernt: Am Dienstag ist in Gutach die neue B 33-Fußgängerampel am Dorfparkplatz in Betrieb genommen worden - in Sichtweite der älteren Rathausampel.', 'output_7b': ['They are not 100 meters apart: On Tuesday, the new B 33 pedestrian traffic light at the village square in Gutach was put into operation - in sight of the older town hall traffic light.'], 'output_tiny': ['You are not 100 meters away from each other: On Monday, the new B 33-foot pedestrian crossing was opened at the 

### **CNN_Dailymail (Summarization)**
is an English-language dataset containing just over 300k unique news articles as written by journalists at CNN and the Daily Mail

In [9]:
from datasets import load_dataset

cnn_dailymail_dataset = load_dataset('abisee/cnn_dailymail', '2.0.0', split='test')

In [52]:
input_text = cnn_dailymail_dataset[100]['article'] 
input_prompt = "Summarize the following text in under 50 words: \n\n" + input_text + "\n\n Write the summary here: "

inputs = tinyllama_tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")

In [53]:
output = tinyllama.generate(inputs['input_ids'], max_new_tokens=100)

output_text = tinyllama_tokenizer.decode(output[0], skip_special_tokens=True)

print(f"Input: {input_prompt}")
print(f"Output: {output_text}")

Input: Summarize the following text in under 50 words: 

(CNN)Anthony Ray Hinton is thankful to be free after nearly 30 years on Alabama's death row for murders he says he didn't commit. And incredulous that it took so long. Hinton, 58, looked up, took in the sunshine and thanked God and his lawyers Friday morning outside the county jail in Birmingham, minutes after taking his first steps as a free man since 1985. He spoke of unjustly losing three decades of his life, under fear of execution, for something he didn't do. "All they had to do was to test the gun, but when you think you're high and mighty and you're above the law, you don't have to answer to nobody," Hinton told reporters. "But I've got news for you -- everybody that played a part in sending me to death row, you will answer to God." Jefferson County Circuit Court Judge Laura Petro had ordered Hinton released after granting the state's motion to dismiss charges against him. Hinton was convicted of murder in the 1985 deaths 

In [54]:
summary_prefix = "Write the summary here: "
if summary_prefix in output_text:
    cleaned_output = output_text.split(summary_prefix)[-1].strip()
else:
    cleaned_output = output_text.strip()

print(cleaned_output)

Anthony Ray Hinton, a man who spent nearly 30 years on Alabama's death row for murders he says he didn't commit, is thankful to be free after nearly 30 years on the list. He thanked God and his lawyers Friday morning outside the county jail in Birmingham, minutes after taking his first steps as a free man since 1985. Hinton, 58, looked up, took in


In [55]:
token_ranges = {
    '0-100': 0,
    '101-200': 0,
    '201-300': 0,
    '301-400': 0,
    '401-500': 0,
    '501-600': 0,
    '601-700': 0,
    '701-800': 0,
    '801-900': 0,
    '901-1000': 0,
    '1001+': 0
}

max_tokens = -1
max_tokens_idx = -1

for idx, data in enumerate(cnn_dailymail_dataset):
    input_text = data['article']
    tokens = llama7b_tokenizer(input_text, return_tensors="pt")
    num_tokens = len(tokens['input_ids'][0])
    
    if num_tokens > max_tokens:
        max_tokens = num_tokens
        max_tokens_idx = idx
    
    if num_tokens <= 100:
        token_ranges['0-100'] += 1
    elif num_tokens <= 200:
        token_ranges['101-200'] += 1
    elif num_tokens <= 300:
        token_ranges['201-300'] += 1
    elif num_tokens <= 400:
        token_ranges['301-400'] += 1
    elif num_tokens <= 500:
        token_ranges['401-500'] += 1
    elif num_tokens <= 600:
        token_ranges['501-600'] += 1
    elif num_tokens <= 700:
        token_ranges['601-700'] += 1
    elif num_tokens <= 800:
        token_ranges['701-800'] += 1
    elif num_tokens <= 900:
        token_ranges['801-900'] += 1
    elif num_tokens <= 1000:
        token_ranges['901-1000'] += 1
    else:
        token_ranges['1001+'] += 1

print("Number of data points in different token ranges:")
for key, value in token_ranges.items():
    print(f"{key}: {value}")

print(f"\nData point with the most tokens is at index: {max_tokens_idx}")
print(f"Number of tokens: {max_tokens}")
print(f"Input text: {cnn_dailymail_dataset[max_tokens_idx]['article']}")

Number of data points in different token ranges:
0-100: 1
101-200: 41
201-300: 261
301-400: 575
401-500: 903
501-600: 1011
601-700: 1030
701-800: 1039
801-900: 963
901-1000: 898
1001+: 4768

Data point with the most tokens is at index: 603
Number of tokens: 3534
Input text: (CNN)The nominations for the 69th Annual Tony Awards were announced Tuesday morning. Past Tony winner and three-time nominee Mary-Louise Parker unveiled the nominees with Bruce Willis, who is set to make his Broadway debut in the upcoming play "Misery." The awards are set to be handed out June 7 in a ceremony airing live at 8 p.m. on CBS (tape-delayed on the West Coast) from Radio City Music Hall. Kristin Chenoweth, a nominee for "On the 20th Century," and recent "Cabaret" star Alan Cumming are set to host the ceremony. The full list of nominees is below. Best Play . "The Curious Incident of the Dog in the Night-Time" Author: Simon Stephens . Producers: Stuart Thompson, Tim Levy for NT America, Warner Bros. Theatre 

In [10]:
def generate_output(model, tokenizer, dataset, current_idx):
    outputs = []
    
    input_text = cnn_dailymail_dataset[current_idx]['article'] 
    input_prompt = "Summarize the following text in under 50 words: \n\n" + input_text + "\n\n Write the summary here: "
    
    #inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True, max_length=2048).to("cuda")
    #output = model.generate(inputs['input_ids'], max_new_tokens=2048)
    inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")
    output = model.generate(inputs['input_ids'], max_new_tokens=100)
    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    summary_prefix = "Write the summary here: "
    if summary_prefix in output_text:
        cleaned_output = output_text.split(summary_prefix)[-1].strip()
    else:
        cleaned_output = output_text.strip()

    outputs.append(cleaned_output)
    
    return outputs

In [14]:
input_texts = []
outputs_7b = []
outputs_tiny = []
outputs_13b = []

In [12]:
# for current_idx in range(0, 1000):
#     input_text = cnn_dailymail_dataset[current_idx]['article']
#     output_7b = generate_output(llama7b, llama7b_tokenizer, input_text, current_idx)

#     outputs_7b.append(output_7b)
    
#     print(f"Llama-7b | CURRENT IDX: {current_idx} | Length: {len(outputs_7b)}")
#     with open('input_output_pairs_cnn_dailymail_7b', 'wb') as f:
#         pickle.dump(outputs_7b, f)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Llama-7b | CURRENT IDX: 987 | Length: 988
Llama-7b | CURRENT IDX: 988 | Length: 989
Llama-7b | CURRENT IDX: 989 | Length: 990
Llama-7b | CURRENT IDX: 990 | Length: 991
Llama-7b | CURRENT IDX: 991 | Length: 992
Llama-7b | CURRENT IDX: 992 | Length: 993
Llama-7b | CURRENT IDX: 993 | Length: 994
Llama-7b | CURRENT IDX: 994 | Length: 995
Llama-7b | CURRENT IDX: 995 | Length: 996
Llama-7b | CURRENT IDX: 996 | Length: 997
Llama-7b | CURRENT IDX: 997 | Length: 998
Llama-7b | CURRENT IDX: 998 | Length: 999
Llama-7b | CURRENT IDX: 999 | Length: 1000


In [18]:
with open('input_output_pairs_cnn_dailymail_7b', 'rb') as f:
    outputs_7b = pickle.load(f)

print(len(outputs_7b))
# print(outputs_7b)

1000


In [15]:
# for current_idx in range(0, 1000):
#     input_text = cnn_dailymail_dataset[current_idx]['article']
#     output_tiny = generate_output(tinyllama, tinyllama_tokenizer, input_text, current_idx)
    
#     outputs_tiny.append(output_tiny)
    
#     print(f"TinyLlama | CURRENT IDX: {current_idx} | Length: {len(outputs_tiny)}")
#     with open('input_output_pairs_cnn_dailymail_tinyllama', 'wb') as f:
#         pickle.dump(outputs_tiny, f)

TinyLlama | CURRENT IDX: 0 | Length: 1
TinyLlama | CURRENT IDX: 1 | Length: 2
TinyLlama | CURRENT IDX: 2 | Length: 3
TinyLlama | CURRENT IDX: 3 | Length: 4
TinyLlama | CURRENT IDX: 4 | Length: 5
TinyLlama | CURRENT IDX: 5 | Length: 6
TinyLlama | CURRENT IDX: 6 | Length: 7
TinyLlama | CURRENT IDX: 7 | Length: 8
TinyLlama | CURRENT IDX: 8 | Length: 9
TinyLlama | CURRENT IDX: 9 | Length: 10
TinyLlama | CURRENT IDX: 10 | Length: 11
TinyLlama | CURRENT IDX: 11 | Length: 12
TinyLlama | CURRENT IDX: 12 | Length: 13
TinyLlama | CURRENT IDX: 13 | Length: 14
TinyLlama | CURRENT IDX: 14 | Length: 15
TinyLlama | CURRENT IDX: 15 | Length: 16
TinyLlama | CURRENT IDX: 16 | Length: 17
TinyLlama | CURRENT IDX: 17 | Length: 18
TinyLlama | CURRENT IDX: 18 | Length: 19
TinyLlama | CURRENT IDX: 19 | Length: 20
TinyLlama | CURRENT IDX: 20 | Length: 21
TinyLlama | CURRENT IDX: 21 | Length: 22
TinyLlama | CURRENT IDX: 22 | Length: 23
TinyLlama | CURRENT IDX: 23 | Length: 24
TinyLlama | CURRENT IDX: 24 | Lengt

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (2048). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


TinyLlama | CURRENT IDX: 30 | Length: 31
TinyLlama | CURRENT IDX: 31 | Length: 32
TinyLlama | CURRENT IDX: 32 | Length: 33
TinyLlama | CURRENT IDX: 33 | Length: 34
TinyLlama | CURRENT IDX: 34 | Length: 35
TinyLlama | CURRENT IDX: 35 | Length: 36
TinyLlama | CURRENT IDX: 36 | Length: 37
TinyLlama | CURRENT IDX: 37 | Length: 38
TinyLlama | CURRENT IDX: 38 | Length: 39
TinyLlama | CURRENT IDX: 39 | Length: 40
TinyLlama | CURRENT IDX: 40 | Length: 41
TinyLlama | CURRENT IDX: 41 | Length: 42
TinyLlama | CURRENT IDX: 42 | Length: 43
TinyLlama | CURRENT IDX: 43 | Length: 44
TinyLlama | CURRENT IDX: 44 | Length: 45
TinyLlama | CURRENT IDX: 45 | Length: 46
TinyLlama | CURRENT IDX: 46 | Length: 47
TinyLlama | CURRENT IDX: 47 | Length: 48
TinyLlama | CURRENT IDX: 48 | Length: 49
TinyLlama | CURRENT IDX: 49 | Length: 50
TinyLlama | CURRENT IDX: 50 | Length: 51
TinyLlama | CURRENT IDX: 51 | Length: 52
TinyLlama | CURRENT IDX: 52 | Length: 53
TinyLlama | CURRENT IDX: 53 | Length: 54
TinyLlama | CURR

In [17]:
with open('input_output_pairs_cnn_dailymail_tinyllama', 'rb') as f:
    outputs_tiny = pickle.load(f)

print(len(outputs_tiny))
# print(outputs_tiny)

1000


In [12]:
# for current_idx in range(604, 1000):
#     input_text = cnn_dailymail_dataset[current_idx]['article']
#     output_13b = generate_output(llama13b, llama13b_tokenizer, input_text, current_idx)
    
#     outputs_13b.append(output_13b)
    
#     print(f"Llama-13b | CURRENT IDX: {current_idx} | Length: {len(outputs_13b)}")
#     with open('input_output_pairs_cnn_dailymail_13b.pkl', 'wb') as f:
#         pickle.dump(outputs_13b, f)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Llama-13b | CURRENT IDX: 604 | Length: 605
Llama-13b | CURRENT IDX: 605 | Length: 606
Llama-13b | CURRENT IDX: 606 | Length: 607
Llama-13b | CURRENT IDX: 607 | Length: 608
Llama-13b | CURRENT IDX: 608 | Length: 609
Llama-13b | CURRENT IDX: 609 | Length: 610
Llama-13b | CURRENT IDX: 610 | Length: 611
Llama-13b | CURRENT IDX: 611 | Length: 612
Llama-13b | CURRENT IDX: 612 | Length: 613
Llama-13b | CURRENT IDX: 613 | Length: 614
Llama-13b | CURRENT IDX: 614 | Length: 615
Llama-13b | CURRENT IDX: 615 | Length: 616
Llama-13b | CURRENT IDX: 616 | Length: 617
Llama-13b | CURRENT IDX: 617 | Length: 618
Llama-13b | CURRENT IDX: 618 | Length: 619
Llama-13b | CURRENT IDX: 619 | Length: 620
Llama-13b | CURRENT IDX: 620 | Length: 621
Llama-13b | CURRENT IDX: 621 | Length: 622
Llama-13b | CURRENT IDX: 622 | Length: 623
Llama-13b | CURRENT IDX: 623 | Length: 624
Llama-13b | CURRENT IDX: 624 | Length: 625
Llama-13b | CURRENT IDX: 625 | Length: 626
Llama-13b | CURRENT IDX: 626 | Length: 627
Llama-13b |

In [13]:
with open('input_output_pairs_cnn_dailymail_13b.pkl', 'rb') as f:
    outputs_13b = pickle.load(f)

print(len(outputs_13b))
# print(outputs_13b)

1000


In [14]:
!nvidia-smi

Sun Jun 23 21:44:43 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        On  | 00000000:17:00.0 Off |                  N/A |
| 39%   57C    P8              23W / 370W |  11258MiB / 24576MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce RTX 3070        On  | 00000000:65:00.0 Off |  

In [15]:
input_output_pairs = []

In [21]:
# for idx in range(len(cnn_dailymail_dataset)):
#     outputs = {
#         'input': cnn_dailymail_dataset[idx]["article"],
#         'output_7b': outputs_7b[idx],
#         'output_tiny': outputs_tiny[idx],
#         'output_13b': outputs_13b[idx]
#     }
    
#     input_output_pairs.append(outputs)
    
#     print("---------------------------------------------------------------------------")
#     print(f"CURRENT IDX: {idx}")
#     print(f"Length: {len(input_output_pairs)}")
#     # print(f"Current Dataset: {input_output_pairs[-1]}")
#     with open('input_output_pairs_cnn_dailymail.pkl', 'wb') as f:
#         pickle.dump(input_output_pairs, f)
#     print("---------------------------------------------------------------------------")

### **GSM8K (Math)**
is a dataset of 8.5K high quality linguistically diverse grade school math word problems. The dataset was created to support the task of question answering on basic mathematical problems that require multi-step reasoning.

In [10]:
from datasets import load_dataset

gsm8k_dataset = load_dataset('openai/gsm8k', 'main', split='train')

In [19]:
input_text = gsm8k_dataset[0]['question'] 
input_prompt = "Answer the following math question: \n\n" + input_text + "\n\n Lets think step by step: "

inputs = tinyllama_tokenizer(input_prompt, return_tensors="pt").to("cuda")

In [20]:
output = tinyllama.generate(inputs['input_ids'])

output_text = tinyllama_tokenizer.decode(output[0], skip_special_tokens=True)

print(f"Input: {input_prompt}")
print(f"Output: {output_text}")

Input: Answer the following math question: 

Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?

 Lets think step by step: 
Output: Answer the following math question: 

Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?

 Lets think step by step: 

1. Natalia sold clips to 48 of her friends in April.
2. She sold half as many clips in May.
3. She sold 48 clips in April and 24 clips in May.
4. She sold 72 clips in total.

So, the answer is: 72 clips.


In [22]:
answer_prefix = "Lets think step by step: "
if answer_prefix in output_text:
    cleaned_output = output_text.split(answer_prefix)[-1].strip()
else:
    cleaned_output = output_text.strip()

print(cleaned_output)

1. Natalia sold clips to 48 of her friends in April.
2. She sold half as many clips in May.
3. She sold 48 clips in April and 24 clips in May.
4. She sold 72 clips in total.

So, the answer is: 72 clips.


In [22]:
token_ranges = {
    '0-50': 0,
    '51-100': 0,
    '101-150': 0,
    '151-200': 0,
    '201-250': 0
}

max_tokens = -1

for idx, data in enumerate(gsm8k_dataset):
    input_text = data['question']
    tokens = llama7b_tokenizer(input_text, return_tensors="pt")
    num_tokens = len(tokens['input_ids'][0])
    
    if num_tokens > max_tokens:
        max_tokens = num_tokens
        max_tokens_idx = idx
    
    if num_tokens <= 50:
        token_ranges['0-50'] += 1
    elif num_tokens <= 100:
        token_ranges['51-100'] += 1
    elif num_tokens <= 150:
        token_ranges['101-150'] += 1
    elif num_tokens <= 200:
        token_ranges['151-200'] += 1
    elif num_tokens <= 250:
        token_ranges['201-250'] += 1

print("Number of data points in different token ranges:")
for key, value in token_ranges.items():
    print(f"{key}: {value}")

print(f"\nData point with the most tokens is at index: {max_tokens_idx}")
print(f"Number of tokens: {max_tokens}")
print(f"Input text: {gsm8k_dataset[max_tokens_idx]['question']}")

Number of data points in different token ranges:
0-50: 2041
51-100: 4718
101-150: 667
151-200: 44
201-250: 3

Data point with the most tokens is at index: 3331
Number of tokens: 240
Input text: Hasan is packing up his apartment because he’s moving across the country for a new job. He needs to ship several boxes to his new home. The movers have asked that Hasan avoid putting more than a certain weight in pounds in any cardboard box. The moving company has helpfully provided Hasan with a digital scale that will alert him if a package is too heavy. Hasan is in the kitchen, and he fills a cardboard box with 38 dinner plates. When he checks the box, the scale reports his box is too heavy. Hasan knows each of his plates weighs 10 ounces. He removes a single plate from the box and checks the movers’ scale again. The scale reports his box is still too heavy. Hasan repeats the process again and again. When he has removed enough plates, the movers’ scale shows the box is now an acceptable weight

In [11]:
def generate_output(model, tokenizer, dataset, current_idx):
    outputs = []
    
    input_text = gsm8k_dataset[current_idx]['question'] 
    input_prompt = "Answer the following math question: \n\n" + input_text + "\n\n Lets think step by step: "

    inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")
    output = model.generate(inputs['input_ids'])
    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    answer_prefix = "Lets think step by step: "
    if answer_prefix in output_text:
        cleaned_output = output_text.split(answer_prefix)[-1].strip()
    else:
        cleaned_output = output_text.strip()

    outputs.append(cleaned_output)
    
    return outputs

In [35]:
input_texts = []
# outputs_7b = []
# outputs_tiny = []
# outputs_13b = []

In [13]:
for current_idx in range(948, 1000):
    input_text = gsm8k_dataset[current_idx]['question']
    output_7b = generate_output(llama7b, llama7b_tokenizer, input_text, current_idx)

    outputs_7b.append(output_7b)
    
    print(f"Llama-7b | CURRENT IDX: {current_idx} | Length: {len(outputs_7b)}")
    # with open('input_output_pairs_gsm8k_7b', 'wb') as f:
    #     pickle.dump(outputs_7b, f)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Llama-7b | CURRENT IDX: 948 | Length: 949
Llama-7b | CURRENT IDX: 949 | Length: 950
Llama-7b | CURRENT IDX: 950 | Length: 951
Llama-7b | CURRENT IDX: 951 | Length: 952
Llama-7b | CURRENT IDX: 952 | Length: 953
Llama-7b | CURRENT IDX: 953 | Length: 954
Llama-7b | CURRENT IDX: 954 | Length: 955
Llama-7b | CURRENT IDX: 955 | Length: 956
Llama-7b | CURRENT IDX: 956 | Length: 957
Llama-7b | CURRENT IDX: 957 | Length: 958
Llama-7b | CURRENT IDX: 958 | Length: 959
Llama-7b | CURRENT IDX: 959 | Length: 960
Llama-7b | CURRENT IDX: 960 | Length: 961
Llama-7b | CURRENT IDX: 961 | Length: 962
Llama-7b | CURRENT IDX: 962 | Length: 963
Llama-7b | CURRENT IDX: 963 | Length: 964
Llama-7b | CURRENT IDX: 964 | Length: 965
Llama-7b | CURRENT IDX: 965 | Length: 966
Llama-7b | CURRENT IDX: 966 | Length: 967
Llama-7b | CURRENT IDX: 967 | Length: 968
Llama-7b | CURRENT IDX: 968 | Length: 969
Llama-7b | CURRENT IDX: 969 | Length: 970
Llama-7b | CURRENT IDX: 970 | Length: 971
Llama-7b | CURRENT IDX: 971 | Leng

In [12]:
for current_idx in range(957, 1000):
    input_text = gsm8k_dataset[current_idx]['question']
    output_tiny = generate_output(tinyllama, tinyllama_tokenizer, input_text, current_idx)

    outputs_tiny.append(output_tiny)
    
    print(f"TinyLlama | CURRENT IDX: {current_idx} | Length: {len(outputs_tiny)}")
    # with open('input_output_pairs_gsm8k_tiny', 'wb') as f:
    #     pickle.dump(outputs_tiny, f)

TinyLlama | CURRENT IDX: 957 | Length: 958
TinyLlama | CURRENT IDX: 958 | Length: 959
TinyLlama | CURRENT IDX: 959 | Length: 960
TinyLlama | CURRENT IDX: 960 | Length: 961
TinyLlama | CURRENT IDX: 961 | Length: 962
TinyLlama | CURRENT IDX: 962 | Length: 963
TinyLlama | CURRENT IDX: 963 | Length: 964
TinyLlama | CURRENT IDX: 964 | Length: 965
TinyLlama | CURRENT IDX: 965 | Length: 966
TinyLlama | CURRENT IDX: 966 | Length: 967
TinyLlama | CURRENT IDX: 967 | Length: 968
TinyLlama | CURRENT IDX: 968 | Length: 969
TinyLlama | CURRENT IDX: 969 | Length: 970
TinyLlama | CURRENT IDX: 970 | Length: 971
TinyLlama | CURRENT IDX: 971 | Length: 972
TinyLlama | CURRENT IDX: 972 | Length: 973
TinyLlama | CURRENT IDX: 973 | Length: 974
TinyLlama | CURRENT IDX: 974 | Length: 975
TinyLlama | CURRENT IDX: 975 | Length: 976
TinyLlama | CURRENT IDX: 976 | Length: 977
TinyLlama | CURRENT IDX: 977 | Length: 978
TinyLlama | CURRENT IDX: 978 | Length: 979
TinyLlama | CURRENT IDX: 979 | Length: 980
TinyLlama |

In [11]:
with open('input_output_pairs_gsm8k_tiny', 'rb') as f:
    outputs_tiny = pickle.load(f)

print(len(outputs_tiny))
print(outputs_tiny[999])

1000
["1. Miss Grayson's class raised $50 for their field trip.  2. Each student contributed $5 each.  3. The cost of the trip is $7 for each student.  4. After all the field trip costs were paid, how much is left in Miss Grayson's class fund?   The answer is: $43.50.   Remember, the class fund is the amount of money that remains after all the field trip costs are paid."]


In [13]:
for current_idx in range(899, 1000):
    input_text = gsm8k_dataset[current_idx]['question']
    output_13b = generate_output(llama13b, llama13b_tokenizer, input_text, current_idx)

    outputs_13b.append(output_13b)
    
    print(f"Llama13b | CURRENT IDX: {current_idx} | Length: {len(outputs_13b)}")
    # with open('input_output_pairs_gsm8k_13b', 'wb') as f:
    #     pickle.dump(outputs_13b, f)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Llama13b | CURRENT IDX: 899 | Length: 900
Llama13b | CURRENT IDX: 900 | Length: 901
Llama13b | CURRENT IDX: 901 | Length: 902
Llama13b | CURRENT IDX: 902 | Length: 903
Llama13b | CURRENT IDX: 903 | Length: 904
Llama13b | CURRENT IDX: 904 | Length: 905
Llama13b | CURRENT IDX: 905 | Length: 906
Llama13b | CURRENT IDX: 906 | Length: 907
Llama13b | CURRENT IDX: 907 | Length: 908
Llama13b | CURRENT IDX: 908 | Length: 909
Llama13b | CURRENT IDX: 909 | Length: 910
Llama13b | CURRENT IDX: 910 | Length: 911
Llama13b | CURRENT IDX: 911 | Length: 912
Llama13b | CURRENT IDX: 912 | Length: 913
Llama13b | CURRENT IDX: 913 | Length: 914
Llama13b | CURRENT IDX: 914 | Length: 915
Llama13b | CURRENT IDX: 915 | Length: 916
Llama13b | CURRENT IDX: 916 | Length: 917
Llama13b | CURRENT IDX: 917 | Length: 918
Llama13b | CURRENT IDX: 918 | Length: 919
Llama13b | CURRENT IDX: 919 | Length: 920
Llama13b | CURRENT IDX: 920 | Length: 921
Llama13b | CURRENT IDX: 921 | Length: 922
Llama13b | CURRENT IDX: 922 | Leng

In [7]:
with open('input_output_pairs_gsm8k_13b', 'rb') as f:
    outputs_13b = pickle.load(f)

print(len(outputs_13b))
print(outputs_13b[831])

1000
["Step 1: Let's find out how many popsicle sticks Sam has.  Step 2: Let's find out how many popsicle sticks Sid has.  Step 3: Let's find out how many popsicle sticks Steve has.  Step 4: Let's add up all the popsicle sticks to find out the total number of popsicle sticks they have.  Step 5: Let's find out how many popsicle sticks they can use for their Art class activity.  So, let's start!  Step 1: Sam has thrice as many popsicle sticks as Sid. That means Sam has 3x as many popsicle sticks as Sid.  Step 2: Sid has twice as many popsicle sticks as Steve. That means Sid has 2x as many popsicle sticks as Steve.  Step 3: Steve has 12 popsicle sticks.  Now, let's add up all the popsicle sticks:  Sam has 3x as many popsicle sticks as Sid, so Sam has 3x 12 = 36 popsicle sticks.  Sid has 2x as many popsicle sticks as Steve, so Sid has 2x 12 = 24 popsicle sticks.  Total number of popsicle sticks = 36 + 24 = 60 popsicle sticks.  Step 5: They can use 60 popsicle sticks for their Art class act

In [32]:
print(len(outputs_7b))
print(len(outputs_tiny))
print(len(outputs_13b))

1000
1000
1000


Remove the \n from the 3 output arrays

In [33]:
outputs_7b = [[output.replace('\n', ' ') for output in output_list] for output_list in outputs_7b]
outputs_tiny = [[output.replace('\n', ' ') for output in output_list] for output_list in outputs_tiny]
outputs_13b = [[output.replace('\n', ' ') for output in output_list] for output_list in outputs_13b]

# with open('input_output_pairs_gsm8k_7b', 'wb') as f:
#     pickle.dump(outputs_7b, f)
# with open('input_output_pairs_gsm8k_tiny', 'wb') as f:
#     pickle.dump(outputs_tiny, f)
# with open('input_output_pairs_gsm8k_13b', 'wb') as f:
#     pickle.dump(outputs_13b, f)

In [40]:
input_output_pairs = []

In [41]:
for idx in range(len(outputs_7b)):
    outputs = {
        'input': gsm8k_dataset[idx]["question"],
        'output_7b': outputs_7b[idx],
        'output_tiny': outputs_tiny[idx],
        'output_13b': outputs_13b[idx]
    }
    
    input_output_pairs.append(outputs)

# with open('input_output_pairs_gsm8k.pkl', 'wb') as f:
#     pickle.dump(input_output_pairs, f)

In [42]:
len(input_output_pairs)

1000