/
NXsimilarity_grouping.nxdl.xml
169 lines (166 loc) · 7.1 KB
/
NXsimilarity_grouping.nxdl.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="nxdlformat.xsl"?>
<!--
# NeXus - Neutron and X-ray Common Data Format
#
# Copyright (C) 2014-2024 NeXus International Advisory Committee (NIAC)
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 3 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# For further information, see http://www.nexusformat.org
-->
<definition xmlns="http://definition.nexusformat.org/nxdl/3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" category="base" name="NXsimilarity_grouping" extends="NXobject" type="group" xsi:schemaLocation="http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd">
<symbols>
<doc>
The symbols used in the schema to specify e.g. dimensions of arrays.
</doc>
<symbol name="c">
<doc>
Cardinality of the set.
</doc>
</symbol>
<symbol name="n_lbl_num">
<doc>
Number of numerical labels per object.
</doc>
</symbol>
<symbol name="n_lbl_cat">
<doc>
Number of categorical labels per object.
</doc>
</symbol>
<symbol name="n_features">
<doc>
Total number of similarity groups aka features, objects, clusters.
</doc>
</symbol>
</symbols>
<doc>
Metadata to the results of a similarity grouping analysis.
Similarity grouping analyses can be supervised segmentation or machine learning
clustering algorithms. These are routine methods which partition the member of
a set of objects/geometric primitives into (sub-)groups, features of
different type. A plethora of algorithms have been proposed which can be applied
also on geometric primitives like points, triangles, or (abstract) features aka
objects (including categorical sub-groups).
This base class considers metadata and results of one similarity grouping
analysis applied to a set in which objects are either categorized as noise
or belonging to a cluster.
As the results of the analysis each similarity group, here called feature
aka object can get a number of numerical and/or categorical labels.
</doc>
<field name="cardinality" type="NX_UINT" units="NX_UNITLESS">
<doc>
Number of members in the set which is partitioned into features.
</doc>
</field>
<field name="number_of_numeric_labels" type="NX_UINT" units="NX_UNITLESS">
<doc>
How many numerical labels does each feature have.
</doc>
</field>
<field name="number_of_categorical_labels" type="NX_UINT" units="NX_UNITLESS">
<doc>
How many categorical labels does each feature have.
</doc>
</field>
<!--features:
doc: |
Reference to a set of features investigated in a cluster analysis.
Features need to have disjoint numeric identifier.-->
<field name="identifier_offset" type="NX_UINT" units="NX_UNITLESS">
<doc>
Which identifier is the first to be used to label a cluster.
The value should be chosen in such a way that special values can be resolved:
* identifier_offset-1 indicates an object belongs to no cluster.
* identifier_offset-2 indicates an object belongs to the noise category.
Setting for instance identifier_offset to 1 recovers the commonly used
case that objects of the noise category get values to -1 and unassigned points to 0.
Numerical identifier have to be strictly increasing.
</doc>
<dimensions rank="1">
<dim index="1" value="n_lbl_num"/>
</dimensions>
</field>
<field name="numerical_label" type="NX_UINT" units="NX_UNITLESS">
<doc>
Matrix of numerical label for each member in the set.
For classical clustering algorithms this can for instance
encode the cluster_identifier.
</doc>
<dimensions rank="2">
<dim index="1" value="c"/>
<dim index="2" value="n_lbl_num"/>
</dimensions>
</field>
<field name="categorical_label" type="NX_CHAR">
<doc>
Matrix of categorical attribute data for each member in the set.
</doc>
<!--list instances of base classes of an applied cluster algorithm
e.g. (NXclustering_hdbscan):-->
<dimensions rank="2">
<dim index="1" value="c"/>
<dim index="2" value="n_lbl_cat"/>
</dimensions>
</field>
<group name="statistics" type="NXprocess">
<doc>
In addition to the detailed storage which members was grouped to which
feature/group summary statistics are stored under this group.
</doc>
<!--at the level of the set-->
<field name="number_of_unassigned_members" type="NX_UINT" units="NX_UNITLESS">
<doc>
Total number of members in the set which are categorized as unassigned.
</doc>
<dimensions rank="1">
<dim index="1" value="n_lbl_num"/>
</dimensions>
</field>
<field name="noise" type="NX_UINT" units="NX_UNITLESS">
<doc>
Total number of members in the set which are categorized as noise.
</doc>
<dimensions rank="1">
<dim index="1" value="n_lbl_num"/>
</dimensions>
</field>
<!--at the level of the feature set-->
<field name="number_of_features" type="NX_UINT" units="NX_UNITLESS">
<doc>
Total number of clusters (excluding noise and unassigned).
</doc>
</field>
<field name="feature_identifier" type="NX_UINT" units="NX_UNITLESS">
<doc>
Array of numerical identifier of each feature (cluster).
</doc>
<dimensions rank="2">
<dim index="1" value="n_features"/>
<dim index="2" value="n_lbl_num"/>
</dimensions>
</field>
<field name="feature_member_count" type="NX_UINT" units="NX_UNITLESS">
<doc>
Array of number of members for each feature.
</doc>
<dimensions rank="2">
<dim index="1" value="n_features"/>
<dim index="2" value="n_lbl_num"/>
</dimensions>
</field>
</group>
</definition>