## Getting all classes present in MUD dataset to map in our script to generate the synthetic sketches

In [None]:
import os
import json

DATA_PATH = "/home/matheus_mendes/projects/delineo/src/raw-data/mud"

all_json_files = [f for f in os.listdir(DATA_PATH) if f.endswith('.json')]
len(all_json_files)

classes = set()
for json_file in all_json_files:
    with open(os.path.join(DATA_PATH, json_file), 'r', encoding='utf-8') as f:
        data = json.load(f)
        for view in data.get('views', []):
            if not view.get('visible', False):
                continue
            class_name = view.get('class', '')
            # get last piece after dot
            component = class_name.split('.')[-1]
            if component:
                classes.add(component)

print(len(classes))

147


# Relevant components
'TextView'
'KCheckBox'
'ListView'
'EditText'
'GridView'
'View'
'Image'
'SidebarLayout'
'Gallery'
'MaterialCardView'
'ViewFlipper'
'TabWidget'
'CardView'
'ImageButton'
'TableLayout'
'Button'
'MutedVideoView'
'CompoundButton'
'Switch'
'DrawerLayout'
'ImageView'
'MenuItem'
'Dialog'
'VideoView'
'ToggleButton'

--- 
### VINS Dataset

In [3]:
import os
import xml.etree.ElementTree as ET
from pathlib import Path
from collections import Counter
from tqdm import tqdm

# --- CONFIGURATION ---
VINS_ROOT = Path("/home/matheus_mendes/projects/delineo/src/raw-data/vins") 

def get_vins_component_stats():
    if not VINS_ROOT.exists():
        print(f"❌ VINS_ROOT not found at: {VINS_ROOT}")
        return

    platforms = ["Android", "iphone"]
    component_counter = Counter()
    total_files = 0

    print(f"--- SCANNING VINS DATASET AT {VINS_ROOT} ---")

    for platform in platforms:
        ann_dir = VINS_ROOT / platform / "Annotations"
        
        if not ann_dir.exists():
            print(f"⚠️ Warning: Annotations folder not found for {platform}")
            continue
            
        xml_files = list(ann_dir.glob("*.xml"))
        total_files += len(xml_files)
        
        print(f"Scanning {len(xml_files)} files in {platform}...")

        for xml_file in tqdm(xml_files, desc=f"Parsing {platform}"):
            try:
                tree = ET.parse(xml_file)
                root = tree.getroot()
                
                # Find all <object> tags and extract <name>
                for obj in root.findall('object'):
                    name_node = obj.find('name')
                    if name_node is not None and name_node.text:
                        component_counter[name_node.text] += 1
                        
            except Exception as e:
                print(f"Error parsing {xml_file.name}: {e}")

    print("\n" + "="*45)
    print(f"✅ COMPLETED. Scanned {total_files} files.")
    print(f"Found {len(component_counter)} unique component types.")
    print("="*45)
    print(f"{'COMPONENT NAME':<30} | {'COUNT':<10}")
    print("-" * 45)
    
    # Sort by count descending (Most frequent first)
    for name, count in component_counter.most_common():
        print(f"{name:<30} | {count:<10}")


get_vins_component_stats()

--- SCANNING VINS DATASET AT /home/matheus_mendes/projects/delineo/src/raw-data/vins ---
Scanning 740 files in Android...


Parsing Android: 100%|██████████| 740/740 [00:00<00:00, 4289.65it/s]


Scanning 1207 files in iphone...


Parsing iphone: 100%|██████████| 1207/1207 [00:00<00:00, 4485.47it/s]


✅ COMPLETED. Scanned 1947 files.
Found 16 unique component types.
COMPONENT NAME                 | COUNT     
---------------------------------------------
Text                           | 16186     
Icon                           | 7173      
Image                          | 4708      
TextButton                     | 2273      
UpperTaskBar                   | 1771      
EditText                       | 1290      
PageIndicator                  | 308       
CheckedTextView                | 296       
BackgroundImage                | 158       
Switch                         | 118       
Modal                          | 39        
Drawer                         | 22        
Spinner                        | 3         
Remember                       | 2         
Checkbox                       | 2         
CheckBox                       | 2         



