In [None]:
import os
import sys
import pandas as pd
import requests
from dotenv import load_dotenv
import time

# Load environment variables from the .env file
load_dotenv()

BASE_URL = "https://api.ravelry.com"

# The os.getenv() calls will now find the variables loaded from your .env file
RAVELRY_ACCESS_KEY = os.getenv('RAVELRY_ACCESS_KEY')
RAVELRY_PERSONAL_KEY = os.getenv('RAVELRY_PERSONAL_KEY')

In [None]:
# --- ADD THIS DEBUGGING CODE ---
print(f"Access Key Loaded: {RAVELRY_ACCESS_KEY}")
print(f"Personal Key Loaded: {RAVELRY_PERSONAL_KEY}")

Access Key Loaded: read-d4086974ad193fe02828dd97c21b9560
Personal Key Loaded: Eq5JjrVDcMu4Ji01Y2aQ9bMh4gtUpr1JoSYsG7Ri


In [None]:
def extract_categories(category_data):
    """
    Extracts all parent categories from a potentially nested category structure.
    """
    all_categories = set()
    
    # --- START: ADD THIS NEW LOGIC ---
    
    # 1. Check if the input is a list and not empty
    if isinstance(category_data, list) and category_data:
        # If it is, use the first item in the list as our starting point
        current_level = category_data[0]
    # 2. Check if the input is already a dictionary
    elif isinstance(category_data, dict):
        # If so, use it directly
        current_level = category_data
    # 3. Otherwise, we can't process it
    else:
        return [] # Return an empty list if data is invalid
        
    # --- END: NEW LOGIC ---
    
    # Your existing, corrected loop will now work correctly
    while isinstance(current_level, dict):
        if 'name' in current_level:
            all_categories.add(current_level['name'])
        current_level = current_level.get('parent')
        
    return list(all_categories)

In [None]:
def extract_permalinks(attribute_list):
    """
    Extracts the 'permalink' value from each dictionary in a list.
    """
    if not isinstance(attribute_list, list):
        return [] # Return empty list if input is not a list
        
    return [item['permalink'] for item in attribute_list if 'permalink' in item]

In [None]:
def get_pattern_details(pattern_id):
    """
    Fetches details for a given pattern ID, now with rate limiting.
    """
    # --- RATE LIMITING ---
    # Wait for a short duration *before* each request in a loop.
    # A smaller wait is okay here as it's part of a larger process.
    time.sleep(0.5) 
    
    details_url = f"https://api.ravelry.com/patterns/{pattern_id}.json"
    
    try:
        response = requests.get(details_url, auth=(RAVELRY_ACCESS_KEY, RAVELRY_PERSONAL_KEY))
        response.raise_for_status()
        
        details_data = response.json()
        pattern_data = details_data.get('pattern', {})
        
        # ... (the rest of your extraction logic remains the same)
        craft = pattern_data.get('craft', {})['name']
        categories = pattern_data.get('pattern_categories', [])
        attributes = pattern_data.get('pattern_attributes', [])
        difficulty_average = pattern_data.get('difficulty_average', None)
        downloadable = pattern_data.get('downloadable', False)
        gauge = pattern_data.get('gauge', None)
        gauge_divisor = pattern_data.get('gauge_divisor', None)
        gauge_pattern = pattern_data.get('gauge_pattern', None)
        # Added .get() for safety on nested dictionaries
        pattern_type = pattern_data.get('pattern_type', {}).get('permalink')
        yarn_weight = pattern_data.get('yarn_weight', {}).get('name')
        projects_count = pattern_data.get('projects_count', 0)
        rating_average = pattern_data.get('rating_average', None)
        sizes_available = pattern_data.get('sizes_available', "")

        return {
            'Craft': craft, 
            'Categories': extract_categories(categories), 
            'Attributes': extract_permalinks(attributes), 
            'Gauge': gauge, 
            'Difficulty Average': difficulty_average, 
            'Downloadable': downloadable, 
            'Gauge Divisor': gauge_divisor, 
            'Gauge Pattern': gauge_pattern, 
            'Pattern Type': pattern_type, 
            'Yarn Weight': yarn_weight, 
            'Projects Count': projects_count, 
            'Rating Average': rating_average, 
            'Sizes Available': sizes_available
        }

    except requests.exceptions.RequestException as e:
        print(f"Could not fetch data for pattern ID {pattern_id}: {e}")
        return {
            'Craft': None, 'Categories': [], 'Attributes': [], 'Gauge': None, 
            'Difficulty Average': None, 'Downloadable': False, 'Gauge Divisor': None, 
            'Gauge Pattern': None, 'Pattern Type': None, 'Yarn Weight': None, 
            'Projects Count': 0, 'Rating Average': None, 'Sizes Available': ""
        }

def add_details_to_df(df):
    """
    Applies the single, efficient function to the DataFrame to create
    three new columns from the returned data.
    """
    # Using .apply with a lambda function that returns a pandas Series
    # is an efficient way to create multiple columns at once.
    details = df['ID'].apply(lambda pid: pd.Series(get_pattern_details(pid)))
    
    # Join the newly created columns back to the original DataFrame
    return df.join(details)

In [None]:
# Load the CSV file into a DataFrame
sweaters_df = pd.read_csv("sweaters.csv")

# Add pattern details to the DataFrame
sweaters_df = add_details_to_df(sweaters_df)

# Save the updated DataFrame to a new CSV file
sweaters_df.to_csv("sweaters_with_details.csv", index=False)