In [3]:
# Excel to JSON Converter for Exercise Database
# Run this in a Jupyter notebook cell

import pandas as pd
import json
import numpy as np

# Read the Excel file
df = pd.read_excel('OA_Knee_Exercise_Database.xlsx', sheet_name=0)

# Remove rows with empty exercise names
df = df.dropna(subset=['exercise_name'])

print(f"Processing {len(df)} exercises...")

# Difficulty category mapping
difficulty_mapping = {
    'B': 'beginner',
    'BI': 'beginner_to_intermediate', 
    'I': 'intermediate',
    'IA': 'intermediate_to_advance',
    'A': 'advance'
}

# Position columns mapping
position_columns = {
    'position_SL_stand': 'SL_stand',
    'position_split_stand': 'split_stand', 
    'position_DL_stand': 'DL_stand',
    'position_quadruped': 'quadruped',
    'position_supine_lying': 'supine_lying',
    'position_side_lying': 'side_lying'
}

# Muscle columns mapping
muscle_columns = {
    'muscle_quad': 'quad',
    'muscle_hamstring': 'hamstring',
    'muscle_glute_max': 'glute_max', 
    'muscle_hip_flexors': 'hip_flexors',
    'muscle_glute_med_min': 'glute_med_min',
    'muscle_adductors': 'adductors'
}

# Function to convert each row to the desired JSON format
def convert_row_to_json(row):
    # Basic info
    exercise_data = {
        "exercise_name": row['exercise_name'],
        "exercise_name_ch": row.get('exercise_name_ch', ''),  # Will be empty if not exists
    }
    
    # Extract positions (only one position per exercise)
    positions = []
    for col, pos_name in position_columns.items():
        if pd.notna(row[col]) and row[col] == 'O':
            positions.append(pos_name)
    exercise_data["positions"] = positions
    
    # Extract muscles
    muscles = {
        "primary_movers": [],
        "secondary_movers": [],
        "stabiliser": []
    }
    
    for col, muscle_name in muscle_columns.items():
        muscle_type = row[col]
        value_col = col + '_value'
        muscle_value = row[value_col] if pd.notna(row[value_col]) else 0
        
        if pd.notna(muscle_type) and muscle_value > 0:
            muscle_entry = {"muscle": muscle_name, "value": int(muscle_value)}
            
            if muscle_type == 'P':  # Primary mover
                muscles["primary_movers"].append(muscle_entry)
            elif muscle_type == 'N':  # Secondary mover  
                muscles["secondary_movers"].append(muscle_entry)
            elif muscle_type == 'S':  # Stabilizer
                muscles["stabiliser"].append(muscle_entry)
    
    exercise_data["muscles"] = muscles
    
    # Difficulty
    difficulty_level = int(row['difficulty_level']) if pd.notna(row['difficulty_level']) else 1
    difficulty_cat = row['difficulty_category'] if pd.notna(row['difficulty_category']) else 'B'
    exercise_data["difficulty"] = {
        "level": difficulty_level,
        "category": difficulty_mapping.get(difficulty_cat, 'beginner')
    }
    
    # Safety constraints
    safety_flags = row.get('Safety flags', '')
    if pd.notna(safety_flags):
        # Split by comma and clean up
        safety_list = [flag.strip() for flag in str(safety_flags).split(',') if flag.strip()]
        exercise_data["safety_constraints"] = safety_list
    else:
        exercise_data["safety_constraints"] = []
    
    # Sport similarity
    activities = row.get('Activities', '')
    if pd.notna(activities):
        # Split by comma and clean up
        sport_list = [sport.strip() for sport in str(activities).split(',') if sport.strip()]
        exercise_data["sport_similarity"] = sport_list
    else:
        exercise_data["sport_similarity"] = []
    
    # Progressions
    progression_from = []
    regression = row.get('Regression', '')
    if pd.notna(regression):
        progression_from = [reg.strip() for reg in str(regression).split(',') if reg.strip()]
    exercise_data["progression_from"] = progression_from
    
    progression_to = []
    progression = row.get('Progression', '')
    if pd.notna(progression):
        progression_to = [prog.strip() for prog in str(progression).split(',') if prog.strip()]
    exercise_data["progression_to"] = progression_to
    
    # Core and toe touch flags
    exercise_data["core_ipsi"] = pd.notna(row.get('core_ipsi')) and row.get('core_ipsi') == 'O'
    exercise_data["core_contra"] = pd.notna(row.get('core_contra')) and row.get('core_contra') == 'O'
    exercise_data["toe_touch"] = pd.notna(row.get('Toe_touch')) and row.get('Toe_touch') == 'O'
    
    # Clinical summary (empty for now)
    exercise_data["clinical_summary"] = ""
    
    return exercise_data

# Convert all rows
exercises_json = []
for idx, row in df.iterrows():
    try:
        exercise_json = convert_row_to_json(row)
        exercises_json.append(exercise_json)
        print(f"✓ Processed: {exercise_json['exercise_name']}")
    except Exception as e:
        print(f"✗ Error processing row {idx}: {e}")

print(f"\nSuccessfully converted {len(exercises_json)} exercises")

# Save to JSON file
with open('exercises_formatted.json', 'w', encoding='utf-8') as f:
    json.dump(exercises_json, f, indent=2, ensure_ascii=False)

# Also convert to CSV format (flattened)
# Create a flattened version for CSV
csv_data = []
for exercise in exercises_json:
    flat_row = {
        'exercise_name': exercise['exercise_name'],
        'exercise_name_ch': exercise['exercise_name_ch'],
        'positions': ','.join(exercise['positions']),
        'primary_movers': ','.join([f"{m['muscle']}:{m['value']}" for m in exercise['muscles']['primary_movers']]),
        'secondary_movers': ','.join([f"{m['muscle']}:{m['value']}" for m in exercise['muscles']['secondary_movers']]),
        'stabiliser': ','.join([f"{m['muscle']}:{m['value']}" for m in exercise['muscles']['stabiliser']]),
        'difficulty_level': exercise['difficulty']['level'],
        'difficulty_category': exercise['difficulty']['category'],
        'safety_constraints': ','.join(exercise['safety_constraints']),
        'sport_similarity': ','.join(exercise['sport_similarity']),
        'progression_from': ','.join(exercise['progression_from']),
        'progression_to': ','.join(exercise['progression_to']),
        'core_ipsi': exercise['core_ipsi'],
        'core_contra': exercise['core_contra'],
        'toe_touch': exercise['toe_touch'],
        'clinical_summary': exercise['clinical_summary']
    }
    csv_data.append(flat_row)

# Save to CSV
csv_df = pd.DataFrame(csv_data)
csv_df.to_csv('exercises_formatted.csv', index=False, encoding='utf-8-sig')

print("\nFiles saved:")
print("- exercises_formatted.json (structured JSON)")
print("- exercises_formatted.csv (flattened CSV)")

# Display first exercise as example
print("\nExample output (first exercise):")
print(json.dumps(exercises_json[0], indent=2, ensure_ascii=False))

Processing 32 exercises...
✓ Processed: Straight leg raise
✓ Processed: Double leg glute bridging
✓ Processed: Double leg hamstrings bridging
✓ Processed: Single leg glute bridging
✓ Processed: Single leg hamstrings bridging
✓ Processed: Scissors
✓ Processed: 3-months supine hip lowering
✓ Processed: Deadbug
✓ Processed: Side lying Clamshell
✓ Processed: Side plank on knees hip dip
✓ Processed: Side plank on knees hold
✓ Processed: Side plank Clamshell
✓ Processed: Side plank hip abduction
✓ Processed: Copenhagen adductor lv1
✓ Processed: Copenhagen adductor lv2
✓ Processed: Quadruped single limb movement
✓ Processed: Quadruped donkey kick
✓ Processed: Quadruped leg extension
✓ Processed: Quadruped hip abduction
✓ Processed: Birddog
✓ Processed: DL squat
✓ Processed: DL squat with band
✓ Processed: DL squat with adductor squeeze
✓ Processed: Hip hinge
✓ Processed: Split leg squat
✓ Processed: Backward Lunge
✓ Processed: Step up
✓ Processed: Side squat
✓ Processed: Hip hikes
✓ Processed