# Analyze prototype elements

### Count GUI elements

In [1]:
import os
import re
import csv
from bs4 import BeautifulSoup
import pandas as pd
from numpy.ma.extras import row_stack

generated_guis_path = '../generated_guis'
app_ids = ['12740', '14283', '18782', '20947', '22151', '27360', '27382', '27707', '30982', '31390', '32310', '3261', '33383', '34346', '34517', '34527', '35526', '3727', '37505', '38961', '40673', '43872', '43977', '44756', '47926', '49794', '53054', '53469', '54377', '54468', '56905', '58124', '59429', '59576', '61851', '63575', '64858', '65592', '67044', '68368', '69574', '69587', '70410', '8640']
method_names = ['instruction', 'pd_zs', 'pd_fs', 'ref_instruction']
dimension_target = (375, 667)

In [6]:
def get_html_files():
    files = []
    for app_id in app_ids:
        for method in method_names:
            html_path = os.path.join(generated_guis_path, str(app_id), f'{method}.html')
            if os.path.isfile(html_path):
                files.append({'UI_Number': app_id, 'Method': method, 'path': html_path})
    return files

def analyze_html(fileinfo):
    with open(fileinfo['path'], encoding='utf-8') as f:
        soup = BeautifulSoup(f, 'html.parser')

    # All buttons (button, input[type=button|submit|reset])
    buttons = soup.find_all('button')
    input_buttons = soup.find_all('input', {'type': re.compile('button|submit|reset', re.I)})
    all_buttons = buttons + input_buttons
    diff_buttons = set()

    for b in all_buttons:
        key = (b.get('id'), b.get('name'), b.get_text(strip=True), b.get('value'))
        diff_buttons.add(key)

    # Clickable buttons (with onclick or type submit/reset/button)
    clickable_buttons = [b for b in all_buttons if b.has_attr('onclick') or b.get('type') in ['submit', 'button', 'reset']]

    # Action buttons (with event: onclick, onsubmit, or inside <form>)
    action_buttons = []
    for b in all_buttons:
        if b.has_attr('onclick') or b.has_attr('onsubmit'):
            action_buttons.append(b)
        # Button inside form (submit action)
        parent = b.find_parent('form')
        if parent:
            action_buttons.append(b)

    # All input fields
    input_fields = soup.find_all('input')
    textareas = soup.find_all('textarea')
    selects = soup.find_all('select')
    all_inputs = input_fields + textareas + selects
    diff_inputs = set()
    for inp in all_inputs:
        key = (inp.get('id'), inp.get('name'), inp.get('type'))
        diff_inputs.add(key)

    # Dimension – check for exact dimension in body, html, main container, or iframe
    dimension_found = False
    for tag in [soup.body, soup.html]:
        if tag:
            w = tag.get('width') or tag.get('style')
            h = tag.get('height') or tag.get('style')
            # Typically style="width:375px; height:667px;"
            if w and h:
                if ('375' in str(w) and '667' in str(h)) or ('375' in str(h) and '667' in str(w)):
                    dimension_found = True
    # Occasionally set in div with id/root/container
    for div in soup.find_all('div'):
        style = div.get('style')
        if style and '375px' in style and '667px' in style:
            dimension_found = True

    return {
        'UI_Number': fileinfo['UI_Number'],
        'Method': fileinfo['Method'],
        'All_inputs': len(all_inputs),
        'Diff_inputs': len(diff_inputs),
        'All_buttons': len(all_buttons),
        'Diff_buttons': len(diff_buttons),
        'Clickable_buttons': len(clickable_buttons),
        'Action_buttons': len(set(action_buttons)),
        'Dimension_375x667': 'YES' if dimension_found else 'NO'
    }

In [7]:
html_files = get_html_files()
results = []
for f in html_files:
    res = analyze_html(f)
    results.append(res)

with open('gui_prototype_analysis.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['UI_Number', 'Method', 'All_inputs', 'Diff_inputs',
                  'All_buttons', 'Diff_buttons', 'Clickable_buttons', 'Action_buttons', 'Dimension_375x667']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=';')
    writer.writeheader()
    for r in results:
        writer.writerow(r)

print("Analysis complete. Results saved to gui_prototype_analysis.csv")

Analysis complete. Results saved to gui_prototype_analysis.csv


In [None]:
visual_df = pd.read_csv('./design_evaluation/gui_visual_analysis.csv', delimiter=';')
visual_df

In [None]:
import pandas as pd

all_df = pd.DataFrame(results)
visual_df = pd.read_csv('./design_evaluation/gui_visual_analysis.csv', delimiter=';')

visual_df['Dimensions'] = visual_df['Dimensions'].map({'YES': 1, 'NO': 0})
agg_df = visual_df.groupby(['UI_Number'], as_index=False).sum()

visual_df['all_elements'] = all_df['All_inputs']+all_df['All_buttons']
visual_df['nonresponsive_pct'] = visual_df['Nonresponsive'] / (all_df['All_inputs']+all_df['All_buttons'])
visual_df['covered_pct'] = visual_df['Hidden'] / (all_df['All_inputs']+all_df['All_buttons'])
visual_df['out_of_bounds_pct'] = visual_df['Bounds'] / (all_df['All_inputs']+all_df['All_buttons'])

sorted_df = visual_df.sort_values(['nonresponsive_pct', 'covered_pct', 'out_of_bounds_pct'], ascending=False)

incorrect_dim_guis = [prototype[0] for prototype in visual_df if prototype[5] == 'YES']

# print("GUIs with incorrect dimensions (not 375x667):", incorrect_dim_guis)

agg_df = agg_df.drop('Method', axis=1)
agg_df

visual_df

### Sortiranje GUI glede na neodzivnost, prekritost elementov in prikaz izven dimenzij

In [2]:
df = pd.read_csv('./design_evaluation/gui_visual_analysis.csv', delimiter=';')
metrics = ["Bounds", "Nonresponsive", "Hidden", "Dimensions"]

df['Dimensions'] = df['Dimensions'].map({'YES': 1, 'NO': 0})
df_clean = df.groupby("UI_Number")[metrics].mean().reset_index()

summary = df_clean[metrics].describe().T
summary["mean"] = summary["mean"].round(3)
summary["std"] = summary["std"].round(3)
summary["min"] = summary["min"].round(3)
summary["max"] = summary["max"].round(3)
display(summary)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Bounds,44.0,0.114,0.19,0.0,0.0,0.0,0.25,0.75
Nonresponsive,44.0,1.534,1.076,0.25,0.75,1.25,2.3125,4.25
Hidden,44.0,0.153,0.254,0.0,0.0,0.0,0.25,1.25
Dimensions,44.0,0.057,0.119,0.0,0.0,0.0,0.0,0.5


In [29]:
import numpy as np
import pandas as pd

df = pd.read_csv('./design_evaluation/gui_visual_analysis.csv', delimiter=';')
metrics = ["Bounds", "Nonresponsive", "Hidden", "Dimensions"]

df['Dimensions'] = df['Dimensions'].map({'YES': 1, 'NO': 0})
df_clean = df.groupby("UI_Number")[metrics].mean().reset_index()

def normalize_column(col):
    col = col.astype(float)
    min_val = col.min(skipna=True)
    max_val = col.max(skipna=True)
    if pd.isna(min_val) or pd.isna(max_val):
        return pd.Series(0, index=col.index)
    range_val = max_val - min_val
    if range_val == 0:
        return pd.Series(0, index=col.index)
    return (col - min_val) / range_val

norm_df = df_clean.copy()
for m in metrics:
    norm_col = f"{m}_norm"
    norm_df[norm_col] = normalize_column(norm_df[m])

for m in metrics:
    norm_col = f"{m}_norm"
    q1 = norm_df[norm_col].quantile(0.25)
    q2 = norm_df[norm_col].quantile(0.50)
    q3 = norm_df[norm_col].quantile(0.75)

    conditions = [
        norm_df[norm_col] < q1,
        (norm_df[norm_col] >= q1) & (norm_df[norm_col] < q2),
        (norm_df[norm_col] >= q2) & (norm_df[norm_col] < q3),
        norm_df[norm_col] >= q3
    ]
    choices = ["Q1", "Q2", "Q3", "Q4"]

    norm_df[f"{m}_quantile"] = np.select(
        conditions,
        choices,
        default=None
    ).astype(object)

for m in metrics:
    print(m)
    display(norm_df[["UI_Number", m + "_norm", m + "_quantile"]])


Bounds


Unnamed: 0,UI_Number,Bounds_norm,Bounds_quantile
0,3261,0.666667,Q4
1,3727,0.0,Q3
2,8640,0.333333,Q4
3,12740,0.333333,Q4
4,14283,0.0,Q3
5,18782,0.0,Q3
6,20947,0.0,Q3
7,22151,0.0,Q3
8,27360,0.0,Q3
9,27382,0.0,Q3


Nonresponsive


Unnamed: 0,UI_Number,Nonresponsive_norm,Nonresponsive_quantile
0,3261,0.5,Q3
1,3727,0.25,Q3
2,8640,0.5,Q3
3,12740,0.0625,Q1
4,14283,0.625,Q4
5,18782,0.1875,Q2
6,20947,0.125,Q2
7,22151,0.0625,Q1
8,27360,0.0625,Q1
9,27382,0.125,Q2


Hidden


Unnamed: 0,UI_Number,Hidden_norm,Hidden_quantile
0,3261,0.0,Q3
1,3727,0.2,Q4
2,8640,0.2,Q4
3,12740,0.0,Q3
4,14283,0.0,Q3
5,18782,0.0,Q3
6,20947,0.0,Q3
7,22151,0.0,Q3
8,27360,0.6,Q4
9,27382,0.2,Q4


Dimensions


Unnamed: 0,UI_Number,Dimensions_norm,Dimensions_quantile
0,3261,0.0,Q4
1,3727,0.0,Q4
2,8640,0.0,Q4
3,12740,0.0,Q4
4,14283,0.0,Q4
5,18782,0.0,Q4
6,20947,0.5,Q4
7,22151,0.0,Q4
8,27360,0.5,Q4
9,27382,0.0,Q4


### Sortiranje glede na ujemanje z opisom

In [30]:
desc_df = pd.read_csv('./description_validation/rezultati.csv', delimiter=';')

agg_df = desc_df.groupby(['UI_Number', 'All'], as_index=False)['Correct'].mean()
agg_df['match_pct'] = agg_df['Correct']/agg_df['All']
sorted_df = agg_df.sort_values(['match_pct'], ascending=True)
sorted_df

Unnamed: 0,UI_Number,All,Correct,match_pct
30,54468,5,2.5,0.5
15,34346,8,4.25,0.53125
29,54377,10,5.5,0.55
38,65592,8,6.25,0.78125
31,56905,7,5.5,0.785714
32,58124,6,4.75,0.791667
0,3261,7,5.75,0.821429
26,49794,6,5.0,0.833333
11,30982,9,7.5,0.833333
43,70410,9,7.5,0.833333


In [33]:
summary = sorted_df["match_pct"].describe().T
summary["mean"] = summary["mean"].round(3)
summary["std"] = summary["std"].round(3)
summary["min"] = summary["min"].round(3)
summary["max"] = summary["max"].round(3)
summary

count    44.000000
mean      0.884000
std       0.117000
min       0.500000
25%       0.843750
50%       0.916667
75%       0.958333
max       1.000000
Name: match_pct, dtype: float64

In [None]:
list = [3261,3727,8640,12740,18782,20947,22151,27707,30982,31390,32310,33383,34517,34527, 38961,43977,44756,47926,49794,58124,59429,59576,64858,69574,69587]
len(list)*4

### Dodatne metrike za ocenjevanje GUI

1. Alignment (Poravnava)

Koliko so elementi na strani poravnani vzdolž vertikalne osi. Manjša standardna devijacija položajev elementov pomeni boljšo poravnavo.

2. Spacing (Razmik med elementi)

Standardna devijacija razmikov med vrhnjimi robovi elementov (vertikalni razmik). Stabilen, enakomeren razmik daje občutek urejenosti, olajša vizualno skeniranje in navigacijo po strani.

3. Overlap Score (Prekrivanje elementov)

Kolikšen delež površine elementov se prekriva. Višje prekrivanje pomeni nižjo uporabniško kakovost (score = 1 − overlap_ratio).

4. Saliency Score (Pomembnost vizualnih elementov)

Koliko pozornosti privlačijo posamezni elementi na sliki, temelji na algoritmu vizualne izrazitosti.
Elementi, ki so bolj izraziti, bolj pritegnejo uporabnikovo pozornost.

5. Color Harmony (Barvna harmonija)

Kombinacija razdalj med barvami (Lab prostor), entropije in kontrasta svetlosti. Višja vrednost pomeni bolj skladno barvno paleto.

In [9]:
# ==========================================
# 📈 Material Design Metrics — Analysis & Visualization
# ==========================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# ---------- Nastavitve ----------
CSV_PATH = Path("all_ui_metrics_md_sync.csv")  # <-- popravi ime če imaš async varianto
OUT_DIR = Path("plots_md_analysis")
OUT_DIR.mkdir(exist_ok=True)

# ---------- Naloži podatke ----------
df = pd.read_csv(CSV_PATH)
print("✅ Prebranih vrstic:", len(df))
display(df.head())

# ---------- Prečišči ----------
metrics = ["spacing_md", "overlap_md", "color_contrast", "focusable_ratio", "labelled_ratio"]
df_clean = df.dropna(subset=metrics)

# ---------- Vektor metrik (per app_id) ----------
metric_vector = df_clean.groupby("app_id")[metrics].mean().reset_index()
metric_vector["mean_score"] = metric_vector[metrics].mean(axis=1)

display(metric_vector.sort_values("mean_score", ascending=False).head(10))

# ---------- Povzetek po metrikah ----------
summary = df_clean[metrics].describe().T
summary["mean"] = summary["mean"].round(3)
summary["std"] = summary["std"].round(3)
summary["min"] = summary["min"].round(3)
summary["max"] = summary["max"].round(3)
display(summary)

# ---------- Distribucije metrik ----------
for m in metrics:
    plt.figure(figsize=(6,4))
    sns.histplot(df_clean[m], bins=20, kde=True)
    plt.title(f"Distribucija metrike: {m}")
    plt.xlabel("Vrednost (0–1)")
    plt.ylabel("Število primerov")
    plt.tight_layout()
    plt.savefig(OUT_DIR / f"dist_{m}.png")
    plt.close()

# ---------- Boxplot primerjava ----------
plt.figure(figsize=(8,5))
sns.boxplot(data=df_clean[metrics])
plt.title("Porazdelitev metrik (boxplot)")
plt.ylabel("Vrednost")
plt.tight_layout()
plt.savefig(OUT_DIR / "boxplot_all_metrics.png")
plt.close()

# ---------- Korelacije ----------
corr = df_clean[metrics].corr()
plt.figure(figsize=(6,5))
sns.heatmap(corr, annot=True, cmap="Blues", vmin=0, vmax=1)
plt.title("Korelacijska matrika med metrikami")
plt.tight_layout()
plt.savefig(OUT_DIR / "correlation_heatmap.png")
plt.close()

# ---------- Povzetek poročila ----------
print("📁 Grafi shranjeni v:", OUT_DIR)
print("\n📊 Povzetek korelacij:")
display(corr)


✅ Prebranih vrstic: 176


Unnamed: 0,app_id,prompt,n_elements,spacing_md,overlap_md,color_contrast,focusable_ratio,labelled_ratio
0,12740,instruction,28,0.592593,0.0,0.639307,1.0,0.0
1,12740,pd_zs,27,0.576923,0.0,0.483475,1.0,0.0
2,12740,pd_fs,25,0.625,0.0,0.336107,1.0,0.0
3,12740,ref_instruction,27,0.692308,0.0,0.492347,1.0,1.0
4,14283,instruction,26,0.28,0.0,0.590978,1.0,0.0


Unnamed: 0,app_id,spacing_md,overlap_md,color_contrast,focusable_ratio,labelled_ratio,mean_score
26,49794,0.599217,0.0,0.413606,1.0,0.611111,0.524787
10,27707,0.807643,0.0,0.521488,1.0,0.25,0.515826
20,38961,0.530819,0.0,0.291981,1.0,0.703704,0.505301
24,44756,0.345238,0.0,0.387803,1.0,0.666667,0.479941
23,43977,0.470417,0.0,0.412917,1.0,0.5,0.476667
3,12740,0.621706,0.0,0.487809,1.0,0.25,0.471903
1,3727,0.429528,0.0,0.47925,1.0,0.357143,0.453184
11,30982,0.456572,0.0,0.632501,0.983333,0.183333,0.451148
21,40673,0.443311,0.0,0.52231,1.0,0.25,0.443124
31,56905,0.566993,0.022166,0.513974,0.903846,0.203297,0.442055


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
spacing_md,157.0,0.481,0.155,0.061,0.375,0.473684,0.581818,0.839
overlap_md,157.0,0.002,0.016,0.0,0.0,0.0,0.0,0.148
color_contrast,157.0,0.415,0.205,0.065,0.241591,0.371952,0.566233,0.915
focusable_ratio,157.0,0.979,0.107,0.0,1.0,1.0,1.0,1.0
labelled_ratio,157.0,0.138,0.305,0.0,0.0,0.0,0.0,1.0


📁 Grafi shranjeni v: plots_md_analysis

📊 Povzetek korelacij:


Unnamed: 0,spacing_md,overlap_md,color_contrast,focusable_ratio,labelled_ratio
spacing_md,1.0,0.02482,-0.165903,-0.067513,0.082179
overlap_md,0.02482,1.0,0.041996,0.026671,0.002955
color_contrast,-0.165903,0.041996,1.0,0.11514,0.007122
focusable_ratio,-0.067513,0.026671,0.11514,1.0,-0.16121
labelled_ratio,0.082179,0.002955,0.007122,-0.16121,1.0


In [10]:
metric_vector

Unnamed: 0,app_id,spacing_md,overlap_md,color_contrast,focusable_ratio,labelled_ratio,mean_score
0,3261,0.617574,0.0,0.356012,1.0,0.0,0.394717
1,3727,0.429528,0.0,0.47925,1.0,0.357143,0.453184
2,8640,0.337075,0.0,0.408253,1.0,0.0,0.349066
3,12740,0.621706,0.0,0.487809,1.0,0.25,0.471903
4,14283,0.455714,0.0,0.472449,1.0,0.0,0.385633
5,18782,0.39554,0.0,0.345244,0.666667,0.0,0.28149
6,20947,0.509687,0.0,0.421771,0.9375,0.0625,0.386292
7,22151,0.380405,0.0,0.410675,1.0,0.25,0.408216
8,27360,0.427024,0.0,0.430009,1.0,0.0,0.371407
9,27382,0.349701,0.0,0.303503,1.0,0.0,0.330641


In [43]:
from pathlib import Path

CSV_PATH = Path("all_ui_metrics_md_sync.csv")
metrics = ["spacing_md", "overlap_md", "color_contrast", "focusable_ratio", "labelled_ratio"]

df = pd.read_csv(CSV_PATH)
df_clean = df.dropna(subset=metrics)

metric_vector = df_clean.groupby("app_id")[metrics].mean().reset_index()
metric_vector["mean_score"] = metric_vector[metrics].mean(axis=1)

# display(metric_vector.sort_values("mean_score", ascending=False).head(10))

summary = df_clean[metrics].describe().T
summary["mean"] = summary["mean"].round(3)
summary["std"] = summary["std"].round(3)
summary["min"] = summary["min"].round(3)
summary["max"] = summary["max"].round(3)
display(summary)

for m in metrics:
    q1 = metric_vector[m].quantile(0.25)
    q2 = metric_vector[m].quantile(0.50)
    q3 = metric_vector[m].quantile(0.75)
    print(q1, q2, q3)

    conditions = [
        metric_vector[m] < q1,
        (metric_vector[m] >= q1) & (metric_vector[m] < q2),
        (metric_vector[m] >= q2) & (metric_vector[m] < q3),
        metric_vector[m] >= q3
    ]
    choices = ["Q1", "Q2", "Q3", "Q4"]

    metric_vector[f"{m}_quantile"] = np.select(
        conditions,
        choices,
        default=None
    ).astype(object)

metric_vector[["app_id", "spacing_md", "spacing_md_quantile"]]


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
spacing_md,157.0,0.481,0.155,0.061,0.375,0.473684,0.581818,0.839
overlap_md,157.0,0.002,0.016,0.0,0.0,0.0,0.0,0.148
color_contrast,157.0,0.415,0.205,0.065,0.241591,0.371952,0.566233,0.915
focusable_ratio,157.0,0.979,0.107,0.0,1.0,1.0,1.0,1.0
labelled_ratio,157.0,0.138,0.305,0.0,0.0,0.0,0.0,1.0


0.41159060846560847 0.4565719299737354 0.5416538679577961
0.0 0.0 0.0
0.34887394686691753 0.41067547966546275 0.4835297454728128
1.0 1.0 1.0
0.0 0.05 0.25


Unnamed: 0,app_id,spacing_md,spacing_md_quantile
0,3261,0.617574,Q4
1,3727,0.429528,Q2
2,8640,0.337075,Q1
3,12740,0.621706,Q4
4,14283,0.455714,Q2
5,18782,0.39554,Q1
6,20947,0.509687,Q3
7,22151,0.380405,Q1
8,27360,0.427024,Q2
9,27382,0.349701,Q1


### Izbor top GUI

In [50]:
all = [3261,3727,8640,12740,14283,18782,20947,22151,27360,27382,
27707,30982,31390,32310,33383,34346,34517,34527,35526,37505,
38961,40673,43872,43977,44756,47926,49794,53054,53469,54377,
54468,56905,58124,59429,59576,61851,63575,64858,65592,67044,
68368,69574,69587,70410]

description = [54468,34346,54377,65592,56905,58124,3261,49794,30982,70410,40673]
errors = [54377,53469,27360,61851,34346,63575,35526,49794,34517,3727,14283]
bounds = [56905,3261,69587,34517,64858]
nonresponsive = [70410,63575,53054,34346,65592,35526,14283,37505,68368,56905,43872]
hidden = [70410,67044,27360,34346]
dimensions = [40673,43872,68368,20947,27360,27707,56905,47926,33383]
# color = [32310,34517,53469,38961,37505,27382,70410,65592,18782,34527,47926]
# focus = [18782,32310,56905,34527,20947,35526,47926,65592,61851,30982]
# spaces = [54468,61851,8640,44756,27382,64858,35526,69587,22151,18782,43872]
color = [32310,34517]
focus = [18782,32310]
spaces = [54468,61851]

bad = description + errors + bounds + nonresponsive + hidden + dimensions + color + focus + spaces
good = [x for x in all if x not in bad]
print(len(good), good)

12 [8640, 12740, 22151, 27382, 31390, 34527, 38961, 43977, 44756, 59429, 59576, 69574]


In [51]:
[str(x) for x in good]

['8640',
 '12740',
 '22151',
 '27382',
 '31390',
 '34527',
 '38961',
 '43977',
 '44756',
 '59429',
 '59576',
 '69574']