## Creating Few Shot datasets

In [53]:
from datasets import load_dataset
import pandas as pd
import random
import json

In [54]:
task1_dataset = pd.read_json('Task1-dataset.json')

task2_dataset = pd.read_json('Task2-dataset.json')

task3_dataset = pd.read_json('Task3-dataset.json')

In [55]:
# Utilities:
def make_few_shot_inputs(dataset, num_random_examples):
    few_shot_inputs = []
    stop = len(dataset["Input"]) - 1
    for i, input in enumerate(dataset["Input"]):
        prompt = "Please consider the following logic problems carefully and answer the corresponding questions!"
        for j in range(num_random_examples):
            example_index = random.randint(0, stop)
            example_input = dataset["Input"][example_index]
            example_target = dataset["Target"][example_index]
            prompt += "\n" + example_input + "A: " + example_target
        prompt += "\n" + input + "A: "
        few_shot_inputs.append(prompt)
    return few_shot_inputs

def convert_pd_to_jsonl(dataframe, filename):
    dataset = dataframe.to_dict(orient="records")
    with open(filename, "w") as f:
        for line in dataset:
            f.write(json.dumps(line) + "\n")

In [56]:
# Task 1:
few_shot_inputs_1_example = make_few_shot_inputs(task1_dataset, num_random_examples = 1)
few_shot_inputs_2_examples = make_few_shot_inputs(task1_dataset, num_random_examples = 2)
few_shot_inputs_4_examples = make_few_shot_inputs(task1_dataset, num_random_examples = 4)


In [57]:
task1_fs1_dataset = task1_dataset.copy()
task1_fs1_dataset['FewShot-1'] = few_shot_inputs_1_example
task1_fs1_dataset.to_json('Task1-fs1-dataset.json')

task1_fs2_dataset = task1_dataset.copy()
task1_fs2_dataset['FewShot-2'] = few_shot_inputs_2_examples
task1_fs2_dataset.to_json('Task1-fs2-dataset.json')

task1_fs4_dataset = task1_dataset.copy()
task1_fs4_dataset['FewShot-4'] = few_shot_inputs_4_examples
task1_fs4_dataset.to_json('Task1-fs4-dataset.json')

In [58]:
convert_pd_to_jsonl(task1_fs1_dataset, "Task1-fs1-dataset.jsonl")
convert_pd_to_jsonl(task1_fs2_dataset, "Task1-fs2-dataset.jsonl")
convert_pd_to_jsonl(task1_fs4_dataset, "Task1-fs4-dataset.jsonl")

In [59]:
# Task 2:
few_shot_inputs_1_example = make_few_shot_inputs(task2_dataset, num_random_examples = 1)
few_shot_inputs_2_examples = make_few_shot_inputs(task2_dataset, num_random_examples = 2)
few_shot_inputs_4_examples = make_few_shot_inputs(task2_dataset, num_random_examples = 4)


In [60]:
task2_fs1_dataset = task2_dataset.copy()
task2_fs1_dataset['FewShot-1'] = few_shot_inputs_1_example
task2_fs1_dataset.to_json('Task2-fs1-dataset.json')

task2_fs2_dataset = task2_dataset.copy()
task2_fs2_dataset['FewShot-2'] = few_shot_inputs_2_examples
task2_fs2_dataset.to_json('Task2-fs2-dataset.json')

task2_fs4_dataset = task2_dataset.copy()
task2_fs4_dataset['FewShot-4'] = few_shot_inputs_4_examples
task2_fs4_dataset.to_json('Task2-fs4-dataset.json')

In [61]:
convert_pd_to_jsonl(task2_fs1_dataset, "Task2-fs1-dataset.jsonl")
convert_pd_to_jsonl(task2_fs2_dataset, "Task2-fs2-dataset.jsonl")
convert_pd_to_jsonl(task2_fs4_dataset, "Task2-fs4-dataset.jsonl")

In [62]:
# Task 3:
few_shot_inputs_1_example = make_few_shot_inputs(task3_dataset, num_random_examples = 1)
few_shot_inputs_2_examples = make_few_shot_inputs(task3_dataset, num_random_examples = 2)
few_shot_inputs_4_examples = make_few_shot_inputs(task3_dataset, num_random_examples = 4)

In [63]:
task3_fs1_dataset = task3_dataset.copy()
task3_fs1_dataset['FewShot-1'] = few_shot_inputs_1_example
task3_fs1_dataset.to_json('Task3-fs1-dataset.json')

task3_fs2_dataset = task3_dataset.copy()
task3_fs2_dataset['FewShot-2'] = few_shot_inputs_2_examples
task3_fs2_dataset.to_json('Task3-fs2-dataset.json')

task3_fs4_dataset = task3_dataset.copy()
task3_fs4_dataset['FewShot-4'] = few_shot_inputs_4_examples
task3_fs4_dataset.to_json('Task3-fs4-dataset.json')

In [64]:
convert_pd_to_jsonl(task3_fs1_dataset, "Task3-fs1-dataset.jsonl")
convert_pd_to_jsonl(task3_fs2_dataset, "Task3-fs2-dataset.jsonl")
convert_pd_to_jsonl(task3_fs4_dataset, "Task3-fs4-dataset.jsonl")