## Exploring AAC materials

In [8]:
import requests
import seaborn as sns
from collections import Counter, defaultdict
import numpy as np
from PIL import Image
from io import BytesIO
#from IPython.display import display, Image, HTML
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

In [2]:
base_url = "https://api.arasaac.org/api"
language = "en"

In [3]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [None]:
pd.reset_option('display.max_rows')

ARASAAC API provides access to a variety of resources on AAC and using pictograms. To explore these materials, we begin with a general overview

In [17]:
import requests
import pandas as pd
from collections import defaultdict

def get_materials_by_category(max_items=100, language="en"):
    """
    Retrieve materials and organize them by category in a DataFrame
    
    Parameters:
    max_items (int): Maximum number of materials to retrieve
    language (str): Language code for materials
    
    Returns:
    pd.DataFrame: DataFrame with materials organized by categories
    """
    # Get a substantial number of materials to analyze
    url = f"https://api.arasaac.org/v1/materials/new/{max_items}"
    print(f"Fetching data from: {url}")
    
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Error: {response.status_code}")
        return pd.DataFrame()
    
    materials = response.json()
    print(f"Retrieved {len(materials)} materials")
    
    # Create dictionary to store materials by category
    category_materials = defaultdict(list)
    area_materials = defaultdict(list)
    
    # Process each material
    for material in materials:
        # Get basic material info
        material_info = {
            "ID": material.get("id"),
            "Title": material.get("title", "No title"),
            "Description": material.get("desc", "")[:100] + "..." if material.get("desc") and len(material.get("desc")) > 100 else material.get("desc", ""),
            "Language": material.get("language", ""),
            "Downloads": material.get("downloads", 0),
            "LastUpdate": material.get("lastUpdate", "")[:10] if material.get("lastUpdate") else "",
            "File Type": material.get("file", {}).get("type", "") if material.get("file") else ""
        }
        
        # Add to activity categories
        if "activity" in material and material["activity"]:
            for activity in material["activity"]:
                category_materials[f"Activity: {activity}"].append(material_info)
        else:
            category_materials["Activity: Uncategorized"].append(material_info)
            
        # Add to area categories
        if "area" in material and material["area"]:
            for area in material["area"]:
                area_materials[f"Area: {area}"].append(material_info)
        else:
            area_materials["Area: Uncategorized"].append(material_info)
    
    # Combine all categories
    all_categories = {**category_materials, **area_materials}
    
    # Create a DataFrame with category information
    categories_data = []
    for category, materials_list in all_categories.items():
        for material in materials_list:
            material_with_category = material.copy()
            material_with_category["Category"] = category
            categories_data.append(material_with_category)
    
    # Create DataFrame
    df = pd.DataFrame(categories_data)
    
    # Optional: Add category counts summary
    print("\nCategory Counts:")
    category_counts = df["Category"].value_counts()
    print(category_counts)
    
    return df

# Get the materials DataFrame
materials_df = get_materials_by_category()

# Display the DataFrame
if not materials_df.empty:
    # Reorder columns to put Category first
    cols = ["Category"] + [col for col in materials_df.columns if col != "Category"]
    materials_df = materials_df[cols]
    
    display(materials_df)
    
    # Save to CSV (optional)
    # materials_df.to_csv("arasaac_materials_by_category.csv", index=False)
    
    print(f"\nTotal materials: {len(materials_df)}")
    print(f"Unique categories: {materials_df['Category'].nunique()}")

Fetching data from: https://api.arasaac.org/v1/materials/new/100
Retrieved 100 materials

Category Counts:
Category
Activity: Uncategorized    100
Area: Uncategorized        100
Name: count, dtype: int64


Unnamed: 0,Category,ID,Title,Description,Language,Downloads,LastUpdate,File Type
0,Activity: Uncategorized,,No title,,,0,,
1,Activity: Uncategorized,,No title,,,0,,
2,Activity: Uncategorized,,No title,,,0,,
3,Activity: Uncategorized,,No title,,,0,,
4,Activity: Uncategorized,,No title,,,0,,
5,Activity: Uncategorized,,No title,,,0,,
6,Activity: Uncategorized,,No title,,,0,,
7,Activity: Uncategorized,,No title,,,0,,
8,Activity: Uncategorized,,No title,,,0,,
9,Activity: Uncategorized,,No title,,,0,,



Total materials: 200
Unique categories: 2
