# Imports and Functions

In [5]:
import requests
import pandas as pd
import csv

In [6]:
def save_to_csv(data, filename):
    # Extract all unique keys from the data
    fieldnames = set()
    for item in data:
        fieldnames.update(item.keys())

    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for item in data:
            # Fill missing fields with None or a default value
            row = {field: item.get(field, None) for field in fieldnames}
            writer.writerow(row)

def fetch_motor_data():
    url = "https://www.thrustcurve.org/api/v1/search.json"
    headers = {
        "Content-Type": "application/json",
    }
    payload = {
        "maxResults": "1500"
    }

    response = requests.post(url, json=payload, headers=headers)
    response_data = response.json()  # Automatically decode JSON

    return response_data['results']

def fetch_thrust_samples(motor_ids):
    url = "http://www.thrustcurve.org/api/v1/download.json"
    headers = {"Content-Type": "application/json"}
    data = {
        "motorIds": motor_ids,
        "format": "RASP",
        "data": "samples",
        "maxResults": 100
    }
    response = requests.post(url, json=data, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching data for MotorIds {motor_ids}: {response.status_code}")
        return None

## Metadata

Start off with the metadata for cert orgs, diameters, manufacturers, etc.

In [7]:
api_url = 'https://www.thrustcurve.org/api/v1/metadata.json'

# Send a GET request to the API
response = requests.get(api_url)
data = response.json() 

# Iterate over each key in the JSON data
for key in data.keys():
    if isinstance(data[key], list) and data[key]:
        filename = f"./data_raw/{key}.csv"
        df = pd.DataFrame(data[key])
        df.to_csv(filename, index=False)

In [8]:
# Fetch motor data from API
data = fetch_motor_data()
save_to_csv(data, './data_raw/rocket_motors.csv')

n = 100  # Number of motorIds to fetch per batch
df_motors = pd.read_csv('./data_raw/rocket_motors.csv')
motor_ids = df_motors['motorId'].unique().tolist()
batches = [motor_ids[i:i + n] for i in range(0, len(motor_ids), n)]
all_records = []

for batch in batches:
    response = fetch_thrust_samples(batch)
    if response:
        grouped_results = {}
        for item in response.get('results', []):
            motorId = item.get('motorId')
            if motorId not in grouped_results:
                grouped_results[motorId] = item
            elif item.get('source') == 'cert':  # Prioritize cert source
                grouped_results[motorId] = item
                
        for motorId, item in grouped_results.items():
            samples = item.get('samples', [])
            for sample in samples:
                all_records.append({'motorId': motorId, 'time': sample.get('time'), 'thrust': sample.get('thrust')})

# Convert all_records to a DataFrame and save
df_results = pd.DataFrame(all_records)
df_results.to_csv('./data_raw/thrust_samples.csv', index=False)