In [2]:
import requests
import json

# -----------------------
# CONFIG
# -----------------------
GITHUB_URL = "https://raw.githubusercontent.com/nl4opt/nl4opt-competition/main/generation_data/dev.jsonl"
OUTPUT_FILE = "nl4opt_sample.json"
N = 20  # Number of instances to extract

# -----------------------
# STEP 1: Download the file
# -----------------------
print("Downloading dev.jsonl from GitHub...")
response = requests.get(GITHUB_URL)
if response.status_code != 200:
    raise Exception(f"Failed to download file, status code: {response.status_code}")

lines = response.text.strip().split("\n")
print(f"Total lines in file: {len(lines)}")

# -----------------------
# STEP 2: Extract first N lines
# -----------------------
data_sample = {}

for i, line in enumerate(lines[:N]):
    instance = json.loads(line)  # Each line is a JSON object
    # instance has a single top-level key: the instance ID
    instance_id = list(instance.keys())[0]
    instance_data = instance[instance_id]

    # Keep only relevant fields
    clean_data = {
        "document": instance_data.get("document", ""),
        "vars": instance_data.get("vars", []),
        "obj_declaration": instance_data.get("obj_declaration", {}),
        "const_declarations": instance_data.get("const_declarations", [])
    }

    data_sample[instance_id] = clean_data

# -----------------------
# STEP 3: Save to JSON
# -----------------------
with open(OUTPUT_FILE, "w") as f:
    json.dump(data_sample, f, indent=4)

print(f"Saved {len(data_sample)} instances to {OUTPUT_FILE}")

Downloading dev.jsonl from GitHub...
Total lines in file: 99
Saved 20 instances to nl4opt_sample.json


In [2]:
import requests
import json
import random
import os

def download_and_extract_mamo_easy_lp(
    url="https://raw.githubusercontent.com/FreedomIntelligence/Mamo/main/data/optimization/Easy_LP/mamo_easy_lp.jsonl",
    output_file="mamo_easy_lp_subset.json",
    n=20,
    random_seed=42
):
    """
    Downloads the mamo_easy_lp.jsonl file, extracts n instances (randomly),
    then writes them out as a JSON list to output_file.
    
    Args:
        url (str): Direct URL to the .jsonl file.
        output_file (str): Path to write the output JSON file.
        n (int): Number of instances to extract.
        random_seed (int): Seed for reproducibility.
    """
    print(f"ðŸ“¥ Downloading data from {url}")
    resp = requests.get(url)
    resp.raise_for_status()
    lines = resp.text.strip().splitlines()
    print(f"âœ… Downloaded {len(lines)} lines from the file")

    # Parse each line as JSON
    data = [json.loads(line) for line in lines]
    print(f"Parsed {len(data)} instances")

    # If n is less than total, sample randomly
    if n is not None and n < len(data):
        random.seed(random_seed)
        data = random.sample(data, n)
        print(f"ðŸ”€ Randomly sampled {n} instances")

    # Save to JSON
    os.makedirs(os.path.dirname(output_file) or ".", exist_ok=True)
    with open(output_file, "w", encoding="utfâ€‘8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"ðŸ’¾ Saved output to {output_file}")

    # Print the first instance as preview
    print("\nExample instance:")
    print(json.dumps(data[0], indent=2, ensure_ascii=False))

if __name__ == "__main__":
    download_and_extract_mamo_easy_lp(n=20, output_file="mamo_easy_lp_subset.json")

ðŸ“¥ Downloading data from https://raw.githubusercontent.com/FreedomIntelligence/Mamo/main/data/optimization/Easy_LP/mamo_easy_lp.jsonl
âœ… Downloaded 652 lines from the file
Parsed 652 instances
ðŸ”€ Randomly sampled 20 instances
ðŸ’¾ Saved output to mamo_easy_lp_subset.json

Example instance:
{
  "id": 115,
  "Question": "A tourism manager is planning to organize two types of tours: $X$ and $Y$. The total number of tours that can be arranged is limited to 500 due to resource constraints. To achieve a certain diversity objective, the combined frequency of 3 times tour X and 2 times tour Y must be at least 1000. Furthermore, the difference between the number of type X and type Y tours should not exceed 200 for balancing purposes. Each tour X requires an investment of $\\$50$, while each tour Y needs an investment of $\\$30$. The manager aims to minimize the total cost while adhering to these constraints and ensuring that the number of both types of tours are integers. Calculate the min

In [2]:
import requests
import json
import os

def download_and_extract_mamo_complex_lp(
    url="https://raw.githubusercontent.com/FreedomIntelligence/Mamo/main/data/optimization/Complex_LP/mamo_complex_lp.jsonl",
    output_file="mamo_complex_lp_subset.json",
    n=40
):
    """
    Downloads the mamo_complex_lp.jsonl file, extracts the first n instances,
    then writes them out as a JSON list to output_file.
    
    Args:
        url (str): Direct URL to the .jsonl file.
        output_file (str): Path to write the output JSON file.
        n (int): Number of instances to extract.
    """
    print(f"ðŸ“¥ Downloading data from {url}")
    resp = requests.get(url)
    resp.raise_for_status()
    lines = resp.text.strip().splitlines()
    print(f"âœ… Downloaded {len(lines)} lines from the file")

    # Parse each line as JSON
    data = [json.loads(line) for line in lines]
    print(f"Parsed {len(data)} instances")

    # Select the first n instances
    data_subset = data[:n]
    print(f"ðŸ“Š Extracted the first {len(data_subset)} instances")

    # Save to JSON
    os.makedirs(os.path.dirname(output_file) or ".", exist_ok=True)
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(data_subset, f, indent=2, ensure_ascii=False)
    print(f"ðŸ’¾ Saved output to {output_file}")

    # Print the first instance as a preview
    print("\nExample instance:")
    print(json.dumps(data_subset[0], indent=2, ensure_ascii=False))


if __name__ == "__main__":
    download_and_extract_mamo_complex_lp(n=40, output_file="mamo_complex_lp_subset.json")

ðŸ“¥ Downloading data from https://raw.githubusercontent.com/FreedomIntelligence/Mamo/main/data/optimization/Complex_LP/mamo_complex_lp.jsonl
âœ… Downloaded 211 lines from the file
Parsed 211 instances
ðŸ“Š Extracted the first 40 instances
ðŸ’¾ Saved output to mamo_complex_lp_subset.json

Example instance:
{
  "id": 1,
  "Question": "Imagine you are a dietitian and you have been tasked with creating a meal plan for a bodybuilder. You have six food items to choose from: Steak, Tofu, Chicken, Broccoli, Rice, and Spinach. Each food provides certain amounts of protein, carbohydrates, and calories, and each has its own cost.\n\nHere's the nutritional value and cost of each food:\n\n- Steak: It gives you 14 grams of protein, 23 grams of carbohydrates, and 63 calories for $4.\n- Tofu: It offers 2 grams of protein, 13 grams of carbohydrates, and 162 calories for $6.\n- Chicken: It packs a punch with 17 grams of protein, 13 grams of carbohydrates, and gives you 260 calories for $6.\n- Broccoli: