# This notebook is used to manipulate test data

In [6]:
import requests
import json

# URL of the JSON file
url = "https://gist.githubusercontent.com/tobrun/c6fb087a9c3deb01ad4d41d55aa6be6b/raw/5d9bfb9166424c10cf26addc8925f33ec69f7a8b/gistfile1.txt"

# Send an HTTP GET request to the URL
response = requests.get(url)
data = []
# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Split the response text into individual JSON objects
    json_objects = response.text.split("\n")
    # Process each JSON object
    for json_str in json_objects:
        # Skip empty lines
        if not json_str:
            continue
        # Parse the JSON object
        data.append(json.loads(json_str))

data[0]

{'id': 'nav_task_0',
 'name': 'estimated_arrival_time',
 'instruction': "What's my ETA?",
 'instances': [{'input': '',
   'output': 'Your estimated time of arrival is 15 minutes.'}],
 'is_classification': False}

Now that we have a list of JSON objects lets convert that a single JSON array:

In [7]:
json_string = json.dumps(data)
json_array = json.loads(json_string)
json_array[0]

{'id': 'nav_task_0',
 'name': 'estimated_arrival_time',
 'instruction': "What's my ETA?",
 'instances': [{'input': '',
   'output': 'Your estimated time of arrival is 15 minutes.'}],
 'is_classification': False}

For each entry, let's add an indication that it's an expected result:

```
"is_correct": true
```

In [8]:
for json_object in json_array:
    json_object['instances'][0]['is_correct'] = True
    
json_array[0]    

{'id': 'nav_task_0',
 'name': 'estimated_arrival_time',
 'instruction': "What's my ETA?",
 'instances': [{'input': '',
   'output': 'Your estimated time of arrival is 15 minutes.',
   'is_correct': True}],
 'is_classification': False}

For each entry, let's move the instuction to be `input` of instances.

In [9]:
for json_object in json_array:
    if not json_object['instances'][0]['input']:
        json_object['instances'][0]['input'] = json_object['instruction']
        del json_object['instruction']
    
json_array[1]  

{'id': 'nav_task_1',
 'name': 'faster_route',
 'instances': [{'input': 'Can you find a faster route?',
   'output': "I'm calculating alternative routes. The fastest route saves 3 minutes but has heavy traffic. Would you like to switch to this route?",
   'is_correct': True}],
 'is_classification': False}

For entries that have an instruction, let move that to input, switch input to be output, switch output to be context

In [10]:
for json_object in json_array:
    if 'instruction' in json_object:
        instruction = json_object['instruction']
        input = json_object['instances'][0]['input'] 
        output = json_object['instances'][0]['output'] 
        
        json_object['instances'][0]['input'] = instruction
        json_object['instances'][0]['context'] = output
        json_object['instances'][0]['output'] = input        
        del json_object['instruction']

Lets dump the result to a file:

In [11]:
with open('../../build/prior_art.json', 'w') as f:
    json.dump(json_array, f, indent=4)
