# Data load

## Scitail

In [1]:
from datasets import load_dataset

# Load the SciTail dataset
scitail_dataset = load_dataset("scitail", "tsv_format")

# Accessing the train and validation sets
train_set = scitail_dataset['train']
validation_set = scitail_dataset['validation']

# Print some examples to see the data
print(train_set[0])
print(validation_set[0])

  from .autonotebook import tqdm as notebook_tqdm


{'premise': 'Pluto rotates once on its axis every 6.39 Earth days;', 'hypothesis': 'Earth rotates on its axis once times in one day.', 'label': 'neutral'}
{'premise': 'An introduction to atoms and elements, compounds, atomic structure and bonding, the molecule and chemical reactions.', 'hypothesis': 'Replace another in a molecule happens to atoms during a substitution reaction.', 'label': 'neutral'}


## ARC

In [2]:
from datasets import load_dataset

# Load the ARC dataset
arc_dataset = load_dataset("ai2_arc", "ARC-Challenge")

# Accessing the different splits
train_set = arc_dataset['train']
validation_set = arc_dataset['validation']
test_set = arc_dataset['test']

print(len(arc_dataset['train']))
print(len(arc_dataset['test']))

# Print some examples to see the data
print(train_set[0])
print(validation_set[0])
print(test_set[0])


1119
1172
{'id': 'Mercury_SC_415702', 'question': 'George wants to warm his hands quickly by rubbing them. Which skin surface will produce the most heat?', 'choices': {'text': ['dry palms', 'wet palms', 'palms covered with oil', 'palms covered with lotion'], 'label': ['A', 'B', 'C', 'D']}, 'answerKey': 'A'}
{'id': 'Mercury_SC_407695', 'question': 'Juan and LaKeisha roll a few objects down a ramp. They want to see which object rolls the farthest. What should they do so they can repeat their investigation?', 'choices': {'text': ['Put the objects in groups.', 'Change the height of the ramp.', 'Choose different objects to roll.', 'Record the details of the investigation.'], 'label': ['A', 'B', 'C', 'D']}, 'answerKey': 'D'}
{'id': 'Mercury_7175875', 'question': 'An astronomer observes that a planet rotates faster after a meteorite impact. Which is the most likely effect of this increase in rotation?', 'choices': {'text': ['Planetary density will decrease.', 'Planetary years will become long

### Convert to ChatGPT style

In [3]:
def format_arc_data(item):
    # Formatting the question
    question = f"Q: {item['question']}\n"
    
    # Formatting each choice
    choices = ''
    for idx, choice in enumerate(item['choices']['text']):
        choices += f"{chr(65 + idx)}) {choice}\n"
    
    # Combine question and choices
    return question + choices

# Example usage
formatted_example = format_arc_data(arc_dataset['train'][0])
print(formatted_example)

Q: George wants to warm his hands quickly by rubbing them. Which skin surface will produce the most heat?
A) dry palms
B) wet palms
C) palms covered with oil
D) palms covered with lotion



In [14]:
import json

with open("../finetune_config.json") as file:
    conf = json.load(file)

#conf['OPENAI_API_KEY']

In [5]:
!pip install openai

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting openai
  Downloading openai-1.25.2-py3-none-any.whl.metadata (21 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Downloading openai-1.25.2-py3-none-any.whl (312 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.9/312.9 kB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m283.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m282.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: httpcore, httpx, openai
Successfully installed httpcore-1.0

In [6]:
from openai import OpenAI

client = OpenAI(
    # This is the default and can be omitted
    api_key=conf["OPENAI_API_KEY"],
)

In [10]:
def chat(prompt):
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model="gpt-3.5-turbo",
    )
    return chat_completion

chat_completion = chat(formatted_example)
response_message = chat_completion.choices[0].message.content
response_message

'B) wet palms\n\nWet skin conducts heat better than dry skin, so rubbing wet palms together will produce the most heat. The water acts as a conductor and helps transfer the heat more efficiently from friction.'

In [11]:
## Llama3

In [18]:
# from openai import OpenAI

client = OpenAI(
    base_url = "https://integrate.api.nvidia.com/v1",
    api_key = conf["NVIDIA_API_KEY"]
)

chat_completion = client.chat.completions.create(
    model="meta/llama3-70b-instruct",
    messages=[{"role":"user","content":formatted_example}],
    temperature=0.5,
    top_p=1,
    max_tokens=1024,
    #stream=True
)

# for chunk in completion:
#     if chunk.choices[0].delta.content is not None:
#         print(chunk.choices[0].delta.content, end="")

chat_completion.choices[0].message.content

'The correct answer is A) dry palms.\n\nWhen George rubs his hands together, the friction between his palms generates heat. The amount of heat generated depends on the coefficient of friction between the two surfaces in contact. In this case, the coefficient of friction is highest when the palms are dry, which means that the surfaces are in close contact and can generate the most heat.\n\nWhen the palms are wet (option B), the water reduces the coefficient of friction, making it more difficult to generate heat. Similarly, when the palms are covered with oil or lotion (options C and D), these substances reduce the friction between the palms, making it harder to generate heat.\n\nTherefore, rubbing dry palms together will produce the most heat.'