In [1]:
import os
import sys
import pandas as pd
import requests
from dotenv import load_dotenv
import time

# Load environment variables from the .env file
load_dotenv()

BASE_URL = "https://api.ravelry.com"

# The os.getenv() calls will now find the variables loaded from your .env file
RAVELRY_ACCESS_KEY = os.getenv('RAVELRY_ACCESS_KEY')
RAVELRY_PERSONAL_KEY = os.getenv('RAVELRY_PERSONAL_KEY')



In [2]:
# --- ADD THIS DEBUGGING CODE ---
print(f"Access Key Loaded: {RAVELRY_ACCESS_KEY}")
print(f"Personal Key Loaded: {RAVELRY_PERSONAL_KEY}")

Access Key Loaded: read-d4086974ad193fe02828dd97c21b9560
Personal Key Loaded: Eq5JjrVDcMu4Ji01Y2aQ9bMh4gtUpr1JoSYsG7Ri


Writing a function to test grabbing different attributes that may be useful in analysis:

In [None]:

def search_patterns(craft, pc, max_pages=20, page=1):
    """
    Searches for patterns on Ravelry with a specific craft and category,
    handling pagination to retrieve a large number of results.

    Args:
        craft (str): The craft to search for (e.g., "knitting").
        pc (str): The pattern category slug (e.g., "sweater|cardigan").
        max_pages (int): The maximum number of pages to fetch. 
                         Set to None to fetch all pages.
    
    Returns:
        pd.DataFrame: A DataFrame of patterns that match the criteria.
    """
    endpoint = f"{BASE_URL}/patterns/search.json"
    all_patterns = []
    page = page
    
    # --- FIXED ---
    # Use the correct variable names 'craft' and 'pc'
    print(f"Starting pattern search for craft: '{craft}', category: '{pc}'")

    while True:
        params = {
            "craft": craft,
            "pc": pc,
            "page_size": 100,
            "page": page
        }

        try:
            print(f"Fetching page {page}...")
            # Make sure you have defined your keys and base URL
            response = requests.get(endpoint, auth=(RAVELRY_ACCESS_KEY, RAVELRY_PERSONAL_KEY), params=params)
            response.raise_for_status()

            data = response.json()
            patterns_on_page = data.get('patterns', [])
            
            if not patterns_on_page:
                print("No more patterns found. Ending search.")
                break

            all_patterns.extend(patterns_on_page)
            
            paginator = data.get('paginator', {})
            if paginator.get('last_page') == page:
                print("Reached the last page of results.")
                break

            if max_pages is not None and page >= max_pages:
                print(f"Reached max_pages limit of {max_pages}.")
                break
                
            page += 1
            time.sleep(1)

        except requests.exceptions.RequestException as e:
            print(f"An error occurred: {e}")
            return None
    
    patterns_data = []
    for pattern in all_patterns:
        patterns_data.append({
            'Name': pattern.get('name'),
            'Designer': pattern.get('designer', {}).get('name'),
            'ID': pattern.get('id'),
            'URL': f"https://www.ravelry.com/patterns/library/{pattern.get('permalink')}",
            'Free': pattern.get('free'),
            'Photo': pattern.get('first_photo', {}).get('medium2_url'),
        })

    print(f"Total patterns collected: {len(patterns_data)}")
    return pd.DataFrame(patterns_data)

In [4]:
cardigans = search_patterns("knitting", "cardigan", 200)
pullovers = search_patterns("knitting", "pullover", 200)

sweaters = pd.concat([cardigans, pullovers]).drop_duplicates().reset_index(drop=True)
sweaters = sweaters.to_csv("sweaters_v3.csv", index=False)

Starting pattern search for craft: 'knitting', category: 'cardigan'
Fetching page 1...
Fetching page 2...
Fetching page 3...
Fetching page 4...
Fetching page 5...
Fetching page 6...
Fetching page 7...
Fetching page 8...
Fetching page 9...
Fetching page 10...
Fetching page 11...
Fetching page 12...
Fetching page 13...
Fetching page 14...
Fetching page 15...
Fetching page 16...
Fetching page 17...
Fetching page 18...
Fetching page 19...
Fetching page 20...
Fetching page 21...
Fetching page 22...
Fetching page 23...
Fetching page 24...
Fetching page 25...
Fetching page 26...
Fetching page 27...
Fetching page 28...
Fetching page 29...
Fetching page 30...
Fetching page 31...
Fetching page 32...
Fetching page 33...
Fetching page 34...
Fetching page 35...
Fetching page 36...
Fetching page 37...
Fetching page 38...
Fetching page 39...
Fetching page 40...
Fetching page 41...
Fetching page 42...
Fetching page 43...
Fetching page 44...
Fetching page 45...
Fetching page 46...
Fetching page 47...
F

In [5]:
# Older run to pull 2,000 sampleas of each category
# cardigans = search_patterns("knitting", "cardigan")
# pullovers = search_patterns("knitting", "pullover")

# sweaters = pd.concat([cardigans, pullovers]).drop_duplicates().reset_index(drop=True)
# sweaters = sweaters.to_csv("sweaters_v2.csv", index=False)


In [6]:
# Old run that only searched for "sweater" -- pulled in mostly pullover sweaters
#sweaters = search_patterns("sweater")
#sweaters.to_csv("sweaters.csv", index=False)