In [1]:
# HellaSwag

In [None]:
!pip install datasets
!pip install openpyxl
!pip install -q -U google-genai
# !pip install transformers
# !pip install accelerate
# !pip install peft
# !pip install bitsandbytes

In [1]:
# Imports
%load_ext autoreload
%autoreload 2

import pandas as pd
import sys

# Call models
from src.call_models import bedrock_connect, call_claude_bedrock
from src.call_models import google_connect, call_gemini, all_string_gemini_config, all_int_gemini_config
from src.translate_func import claude_translation, gemini_translation

# Datasets

from src.benchmarks_code import hellaswag
from prompts import hallaswag_prompts

# Access keys
from my_access_keys import google_access_key, aws_access_key, aws_secret_key

# .csv utils
from src.save_utils import add_dataset_to_csv

# Remove annoying warning
from IPython.core.display_functions import display

In [3]:
## GetDataset

In [2]:
bedrock_client = bedrock_connect(aws_access_key, aws_secret_key)
google_client = google_connect(google_access_key)

In [3]:
hellaswag_dataset = hellaswag.get_hellaswag_datasets()
hellaswag_dataset['hellaswag_train'] = hellaswag_dataset['hellaswag_train'].skip(5).select([1, 10, 20, 30, 40, 50, 60, 66, 70, 80])
hellaswag_file_name = 'compare_csv/hellaswag_train_top_50.csv'

print(hellaswag_file_name)
hellaswag_dataset

README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/24.4M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/6.11M [00:00<?, ?B/s]

data/validation-00000-of-00001.parquet:   0%|          | 0.00/6.32M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/39905 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10003 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10042 [00:00<?, ? examples/s]

compare_csv/hellaswag_train_top_50.csv


{'hellaswag_train': Dataset({
     features: ['ind', 'activity_label', 'ctx_a', 'ctx_b', 'ctx', 'endings', 'source_id', 'split', 'split_type', 'label'],
     num_rows: 10
 })}

In [None]:
df = add_dataset_to_csv(hellaswag_file_name, 'original', hellaswag_dataset['hellaswag_train'], hellaswag.hellaswag_sample_to_dict)
text_df = add_dataset_to_csv(hellaswag_file_name[:-4] + '-text.csv', 'original', hellaswag_dataset['hellaswag_train'], hellaswag.hellaswag_sample_to_dict)
display(df.head(2))
display(text_df.head(2))

In [8]:
!pip install IProgress




In [None]:
exp_name = 'claude_v1_refine'
hebrew_datasets, text_output = claude_translation(
    bedrock_client,
    hellaswag_dataset,
    hallaswag_prompts.HELLASWAG_INSTRUCT_V1_CLAUDE_REFINE,
    hallaswag_prompts.HELLASWAG_FEW_SHOTS,
    hallaswag_prompts.HELLASWAG_FORMAT,
    hellaswag.hellaswag_sample_to_dict,
    hellaswag.hellaswag_dict_to_sample,
)

In [None]:
df = add_dataset_to_csv(hellaswag_file_name, exp_name, hebrew_datasets['hellaswag_train'],  hellaswag.hellaswag_sample_to_dict)
df.head(2)

In [4]:
exp_name = 'fewshots'

hebrew_datasets, text_output = gemini_translation(
    google_client,
    hellaswag_dataset,
    hallaswag_prompts.HELLASWAG_INSTRUCT_V1_GEMINI_MULTI,
    hallaswag_prompts.HELLASWAG_FEW_SHOTS,
    hellaswag.hellaswag_sample_to_dict,
    hellaswag.hellaswag_dict_to_sample,
    if_pro=True,
    think_bud=20_000
)

Translating hellaswag_train...


  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
df = add_dataset_to_csv(hellaswag_file_name, exp_name, hebrew_datasets[0]['hellaswag_train'],  hellaswag.hellaswag_sample_to_dict)
df

In [6]:
from src.translate_func import dict_to_prompt

In [9]:
for i in range(10):
    print('<example>\nEnglish:')
    print(dict_to_prompt(hellaswag.hellaswag_sample_to_dict(hellaswag_dataset['hellaswag_train'][i])))
    print('Hebrew:')
    print(dict_to_prompt(hellaswag.hellaswag_sample_to_dict(hebrew_datasets['hellaswag_train'][i])))
    print('</example>')
    print()

<example>
English:
<activity_label>Getting a haircut</activity_label>
<ctx_a>The man in the blue shirt sits on the chair next to the sink. The other man begins washing his hair. He scrubs in the shampoo and then washes it off.</ctx_a>
<ctx_b>he</ctx_b>
<ctx>The man in the blue shirt sits on the chair next to the sink. The other man begins washing his hair. He scrubs in the shampoo and then washes it off. he</ctx>
<ending 1>then combs it and blow dries his hair after styling it with gel.</ending 1>
<ending 2>shows the razor that he has for shaving his hair.</ending 2>
<ending 3>hair is now dry, he is on his way to the barber.</ending 3>
<ending 4>moves the bucket to the other side of the sink and continues washing his hair.</ending 4>
Hebrew:
<activity_label>להסתפר</activity_label>
<ctx_a>האיש בחולצה הכחולה מתיישב על הכיסא ליד כיור החפיפה. האיש השני מתחיל לחפוף את שיערו. הוא מעסה את השמפו לתוך השיער ואז שוטף אותו.</ctx_a>
<ctx_b>הוא</ctx_b>
<ctx>האיש בחולצה הכחולה מתיישב על הכיסא ליד כי