In [None]:
import json
import random

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
json_files = {
    "spring": "/content/drive/MyDrive/Data/Structured_Spring.json",
    "fall": "/content/drive/MyDrive/Data/Structured_Fall.json"
}

In [None]:
templates = [
    "How effective is {herbicide} against {weed} in {season}?",
    "What is the effectiveness of {herbicide} on {weed} during {season}?",
    "Can you provide the control percentage of {herbicide} for {weed} in {season}?",
    "How well does {herbicide} control {weed} in {season}?",
    "Which herbicides do you recommend for {season} burndown of {weed} with a weed control rating of {rating}?",
    "List all the herbicides for {weed} which are 90%-100% effective in {season}?"
]

In [None]:
def convert_json_to_jsonl(json_file_path, season, output_file_path):
    with open(json_file_path, 'r') as f:
        data = json.load(f)

    weed_herbicides = {}

    for entry in data:
        herbicide = entry['herbicide']
        for weed, effectiveness in entry['effectiveness'].items():
            if weed not in weed_herbicides:
                weed_herbicides[weed] = {}
            if effectiveness not in weed_herbicides[weed]:
                weed_herbicides[weed][effectiveness] = []
            weed_herbicides[weed][effectiveness].append(herbicide)

    with open(output_file_path, 'w') as f_out:
        for weed, effectiveness_data in weed_herbicides.items():
            for effectiveness, herbicides in effectiveness_data.items():
                herbicide_list = ', '.join(herbicides)
                template = random.choice(templates)
                if effectiveness == '90-100% weed control':
                    json_obj = {
                        "prompt": template.format(herbicide=herbicide_list, weed=weed, season=season, rating=effectiveness),
                        "completion": herbicide_list
                    }
                else:
                    json_obj = {
                        "prompt": template.format(herbicide=herbicide, weed=weed, season=season, rating=effectiveness),
                        "completion": effectiveness
                    }
                f_out.write(json.dumps(json_obj) + '\n')

In [None]:
for season, json_file_path in json_files.items():
    output_file_path = f"/content/drive/MyDrive/Data/{season}_output.jsonl"
    convert_json_to_jsonl(json_file_path, season, output_file_path)

In [None]:
from google.colab import files

files.download('/content/drive/MyDrive/Data/spring_output.jsonl')
files.download('/content/drive/MyDrive/Data/fall_output.jsonl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>