In [1]:
import requests
import pandas as pd

URL = "https://api.ravelry.com/pattern_attributes/groups.json"

def get_ravelry_attributes():
    """Fetches the list of pattern attributes from the Ravelry API."""
    try:
        # It's good practice to set a timeout for requests
        response = requests.get(URL, timeout=10)
        response.raise_for_status()
        # The API actually returns a dictionary with one key 'attribute_groups'
        # based on the original documentation, so we'll access that.
        data = response.json()
        return data.get('attribute_groups')
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

def parse_attributes_recursively(group, parent_category, data_list):
    """
    Recursively processes a group and its children to flatten the data,
    using the correct keys 'pattern_attributes' and 'children'.
    """
    # Create the full category path, e.g., "Age / Size / Fit -> Age or Size"
    current_category = f"{parent_category} -> {group['name']}" if parent_category else group['name']

    # 1. Process direct attributes using the correct key: 'pattern_attributes'
    if 'pattern_attributes' in group and group['pattern_attributes']:
        for attribute in group['pattern_attributes']:
            # Ignore placeholder attributes which are often just for structure
            if attribute.get('name') == 'placeholder':
                continue
            data_list.append({
                'category': current_category,
                'attribute_id': attribute.get('id'),
                'attribute_name': attribute.get('name'),
                'description': attribute.get('description', '')
            })

    # 2. Recursively process any children using the correct key: 'children'
    if 'children' in group and group['children']:
        for child_group in group['children']:
            parse_attributes_recursively(child_group, current_category, data_list)


if __name__ == "__main__":
    # The API returns the list of groups under the key 'attribute_groups'
    # but you provided a direct list. For this example to work with your
    # provided data directly, you'd assign it to top_level_groups.
    # The get_ravelry_attributes() function should handle the live API call correctly.
    
    top_level_groups = get_ravelry_attributes()

    if top_level_groups:
        all_attributes = []
        for group in top_level_groups:
            parse_attributes_recursively(
                group=group, 
                parent_category=None, 
                data_list=all_attributes
            )
            
        df = pd.DataFrame(all_attributes)

        if not df.empty:
            print("✅ Success! The data has been processed correctly.\n")
            print("## DataFrame Head ##")
            print(df.head())
            
            print("\n## DataFrame Info ##")
            df.info()
        else:
            print("⚠️ The DataFrame is empty. Check the raw API response.")
            print("Raw Data:", top_level_groups)
    else:
        print("Failed to retrieve attribute groups from the API.")

✅ Success! The data has been processed correctly.

## DataFrame Head ##
        category  attribute_id          attribute_name  \
0  Accessibility           322         adaptive design   
1  Accessibility           323   medical device access   
2  Accessibility           324  medical device support   
3  Accessibility           325    mobility aid support   
4  Accessibility           327                   other   

                                         description  
0  Design has features to support disability or m...  
1  Design provides for access to/from a medical d...  
2        Design is an accessory for a medical device  
3          Design is an accessory for a mobility aid  
4                                                     

## DataFrame Info ##
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 251 entries, 0 to 250
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   category        251 non-null  

In [2]:
# Remove catgories that are not useful for the machine learning model
categories_to_remove = ['Accessibility', 'Mature Content', 'Sock Techniques', 'Sock Techniques -> Heel', 'Sock Techniques -> Toe', 'Crochet Techniques', 'Construction', 'Pattern Instructions']

# The ~ inverts the selection, keeping everything NOT in the list
df = df[~df['category'].isin(categories_to_remove)]

In [3]:
df.to_csv('ravelry_pattern_attributes.csv', index=False)

In [4]:
df

Unnamed: 0,category,attribute_id,attribute_name,description
6,Age / Size / Fit -> Age or Size,10,adult,18 years or over
7,Age / Size / Fit -> Age or Size,4,baby,birth to 12 months
8,Age / Size / Fit -> Age or Size,8,child (4-12),4 to 12 years
9,Age / Size / Fit -> Age or Size,20,doll,scaled for a toy figure
10,Age / Size / Fit -> Age or Size,5,newborn,to fit a newborn baby
...,...,...,...,...
230,Shapes,242,rectangle,four-sided with opposite sides of equal length...
231,Shapes,248,sphere,three dimensional figure all points of which a...
232,Shapes,243,square,"four-sided with all sides of equal length, adj..."
233,Shapes,251,star,shaped like a star
