Skip to content

Commit 34b9d39

Browse files
committed
[FEATURE][processing] K Means clustering algorithm
Adds a native k-means clustering algorithm. Based on a port of PostGIS' ST_ClusterKMeans function, this new algorithm adds a new cluster ID field to a set of input features identify the feature's cluster based on the k-means clustering approach. If non-point geometries are used as input, the clustering is based off the centroid of the input geometries.
1 parent 16f3781 commit 34b9d39

16 files changed

+1032
-1
lines changed
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<ogr:FeatureCollection
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_lines.xsd"
5+
xmlns:ogr="http://ogr.maptools.org/"
6+
xmlns:gml="http://www.opengis.net/gml">
7+
<gml:boundedBy>
8+
<gml:Box>
9+
<gml:coord><gml:X>-1</gml:X><gml:Y>-3</gml:Y></gml:coord>
10+
<gml:coord><gml:X>11</gml:X><gml:Y>5</gml:Y></gml:coord>
11+
</gml:Box>
12+
</gml:boundedBy>
13+
14+
<gml:featureMember>
15+
<ogr:kmeans_lines fid="lines.0">
16+
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>6,2 9,2 9,3 11,5</gml:coordinates></gml:LineString></ogr:geometryProperty>
17+
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
18+
</ogr:kmeans_lines>
19+
</gml:featureMember>
20+
<gml:featureMember>
21+
<ogr:kmeans_lines fid="lines.1">
22+
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>-1,-1 1,-1</gml:coordinates></gml:LineString></ogr:geometryProperty>
23+
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
24+
</ogr:kmeans_lines>
25+
</gml:featureMember>
26+
<gml:featureMember>
27+
<ogr:kmeans_lines fid="lines.2">
28+
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>2,0 2,2 3,2 3,3</gml:coordinates></gml:LineString></ogr:geometryProperty>
29+
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
30+
</ogr:kmeans_lines>
31+
</gml:featureMember>
32+
<gml:featureMember>
33+
<ogr:kmeans_lines fid="lines.3">
34+
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>3,1 5,1</gml:coordinates></gml:LineString></ogr:geometryProperty>
35+
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
36+
</ogr:kmeans_lines>
37+
</gml:featureMember>
38+
<gml:featureMember>
39+
<ogr:kmeans_lines fid="lines.4">
40+
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>7,-3 10,-3</gml:coordinates></gml:LineString></ogr:geometryProperty>
41+
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
42+
</ogr:kmeans_lines>
43+
</gml:featureMember>
44+
<gml:featureMember>
45+
<ogr:kmeans_lines fid="lines.5">
46+
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>6,-3 10,1</gml:coordinates></gml:LineString></ogr:geometryProperty>
47+
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
48+
</ogr:kmeans_lines>
49+
</gml:featureMember>
50+
<gml:featureMember>
51+
<ogr:kmeans_lines fid="lines.6">
52+
<ogr:CLUSTER_ID xsi:nil="true"/>
53+
</ogr:kmeans_lines>
54+
</gml:featureMember>
55+
</ogr:FeatureCollection>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
3+
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
4+
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
5+
<xs:complexType name="FeatureCollectionType">
6+
<xs:complexContent>
7+
<xs:extension base="gml:AbstractFeatureCollectionType">
8+
<xs:attribute name="lockId" type="xs:string" use="optional"/>
9+
<xs:attribute name="scope" type="xs:string" use="optional"/>
10+
</xs:extension>
11+
</xs:complexContent>
12+
</xs:complexType>
13+
<xs:element name="kmeans_lines" type="ogr:kmeans_lines_Type" substitutionGroup="gml:_Feature"/>
14+
<xs:complexType name="kmeans_lines_Type">
15+
<xs:complexContent>
16+
<xs:extension base="gml:AbstractFeatureType">
17+
<xs:sequence>
18+
<xs:element name="geometryProperty" type="gml:LineStringPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
19+
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
20+
<xs:simpleType>
21+
<xs:restriction base="xs:integer">
22+
<xs:totalDigits value="10"/>
23+
</xs:restriction>
24+
</xs:simpleType>
25+
</xs:element>
26+
</xs:sequence>
27+
</xs:extension>
28+
</xs:complexContent>
29+
</xs:complexType>
30+
</xs:schema>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<ogr:FeatureCollection
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_points_3.xsd"
5+
xmlns:ogr="http://ogr.maptools.org/"
6+
xmlns:gml="http://www.opengis.net/gml">
7+
<gml:boundedBy>
8+
<gml:Box>
9+
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
10+
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
11+
</gml:Box>
12+
</gml:boundedBy>
13+
14+
<gml:featureMember>
15+
<ogr:kmeans_points_3 fid="points.0">
16+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
17+
<ogr:id>1</ogr:id>
18+
<ogr:id2>2</ogr:id2>
19+
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
20+
</ogr:kmeans_points_3>
21+
</gml:featureMember>
22+
<gml:featureMember>
23+
<ogr:kmeans_points_3 fid="points.1">
24+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
25+
<ogr:id>2</ogr:id>
26+
<ogr:id2>1</ogr:id2>
27+
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
28+
</ogr:kmeans_points_3>
29+
</gml:featureMember>
30+
<gml:featureMember>
31+
<ogr:kmeans_points_3 fid="points.2">
32+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
33+
<ogr:id>3</ogr:id>
34+
<ogr:id2>0</ogr:id2>
35+
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
36+
</ogr:kmeans_points_3>
37+
</gml:featureMember>
38+
<gml:featureMember>
39+
<ogr:kmeans_points_3 fid="points.3">
40+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
41+
<ogr:id>4</ogr:id>
42+
<ogr:id2>2</ogr:id2>
43+
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
44+
</ogr:kmeans_points_3>
45+
</gml:featureMember>
46+
<gml:featureMember>
47+
<ogr:kmeans_points_3 fid="points.4">
48+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
49+
<ogr:id>5</ogr:id>
50+
<ogr:id2>1</ogr:id2>
51+
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
52+
</ogr:kmeans_points_3>
53+
</gml:featureMember>
54+
<gml:featureMember>
55+
<ogr:kmeans_points_3 fid="points.5">
56+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
57+
<ogr:id>6</ogr:id>
58+
<ogr:id2>0</ogr:id2>
59+
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
60+
</ogr:kmeans_points_3>
61+
</gml:featureMember>
62+
<gml:featureMember>
63+
<ogr:kmeans_points_3 fid="points.6">
64+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
65+
<ogr:id>7</ogr:id>
66+
<ogr:id2>0</ogr:id2>
67+
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
68+
</ogr:kmeans_points_3>
69+
</gml:featureMember>
70+
<gml:featureMember>
71+
<ogr:kmeans_points_3 fid="points.7">
72+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
73+
<ogr:id>8</ogr:id>
74+
<ogr:id2>0</ogr:id2>
75+
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
76+
</ogr:kmeans_points_3>
77+
</gml:featureMember>
78+
<gml:featureMember>
79+
<ogr:kmeans_points_3 fid="points.8">
80+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
81+
<ogr:id>9</ogr:id>
82+
<ogr:id2>0</ogr:id2>
83+
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
84+
</ogr:kmeans_points_3>
85+
</gml:featureMember>
86+
</ogr:FeatureCollection>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
3+
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
4+
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
5+
<xs:complexType name="FeatureCollectionType">
6+
<xs:complexContent>
7+
<xs:extension base="gml:AbstractFeatureCollectionType">
8+
<xs:attribute name="lockId" type="xs:string" use="optional"/>
9+
<xs:attribute name="scope" type="xs:string" use="optional"/>
10+
</xs:extension>
11+
</xs:complexContent>
12+
</xs:complexType>
13+
<xs:element name="kmeans_points_3" type="ogr:kmeans_points_3_Type" substitutionGroup="gml:_Feature"/>
14+
<xs:complexType name="kmeans_points_3_Type">
15+
<xs:complexContent>
16+
<xs:extension base="gml:AbstractFeatureType">
17+
<xs:sequence>
18+
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
19+
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
20+
<xs:simpleType>
21+
<xs:restriction base="xs:integer">
22+
<xs:totalDigits value="10"/>
23+
</xs:restriction>
24+
</xs:simpleType>
25+
</xs:element>
26+
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
27+
<xs:simpleType>
28+
<xs:restriction base="xs:integer">
29+
<xs:totalDigits value="10"/>
30+
</xs:restriction>
31+
</xs:simpleType>
32+
</xs:element>
33+
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
34+
<xs:simpleType>
35+
<xs:restriction base="xs:integer">
36+
<xs:totalDigits value="10"/>
37+
</xs:restriction>
38+
</xs:simpleType>
39+
</xs:element>
40+
</xs:sequence>
41+
</xs:extension>
42+
</xs:complexContent>
43+
</xs:complexType>
44+
</xs:schema>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<ogr:FeatureCollection
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_points_5.xsd"
5+
xmlns:ogr="http://ogr.maptools.org/"
6+
xmlns:gml="http://www.opengis.net/gml">
7+
<gml:boundedBy>
8+
<gml:Box>
9+
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
10+
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
11+
</gml:Box>
12+
</gml:boundedBy>
13+
14+
<gml:featureMember>
15+
<ogr:kmeans_points_5 fid="points.0">
16+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
17+
<ogr:id>1</ogr:id>
18+
<ogr:id2>2</ogr:id2>
19+
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
20+
</ogr:kmeans_points_5>
21+
</gml:featureMember>
22+
<gml:featureMember>
23+
<ogr:kmeans_points_5 fid="points.1">
24+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
25+
<ogr:id>2</ogr:id>
26+
<ogr:id2>1</ogr:id2>
27+
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
28+
</ogr:kmeans_points_5>
29+
</gml:featureMember>
30+
<gml:featureMember>
31+
<ogr:kmeans_points_5 fid="points.2">
32+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
33+
<ogr:id>3</ogr:id>
34+
<ogr:id2>0</ogr:id2>
35+
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
36+
</ogr:kmeans_points_5>
37+
</gml:featureMember>
38+
<gml:featureMember>
39+
<ogr:kmeans_points_5 fid="points.3">
40+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
41+
<ogr:id>4</ogr:id>
42+
<ogr:id2>2</ogr:id2>
43+
<ogr:CLUSTER_ID5>4</ogr:CLUSTER_ID5>
44+
</ogr:kmeans_points_5>
45+
</gml:featureMember>
46+
<gml:featureMember>
47+
<ogr:kmeans_points_5 fid="points.4">
48+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
49+
<ogr:id>5</ogr:id>
50+
<ogr:id2>1</ogr:id2>
51+
<ogr:CLUSTER_ID5>4</ogr:CLUSTER_ID5>
52+
</ogr:kmeans_points_5>
53+
</gml:featureMember>
54+
<gml:featureMember>
55+
<ogr:kmeans_points_5 fid="points.5">
56+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
57+
<ogr:id>6</ogr:id>
58+
<ogr:id2>0</ogr:id2>
59+
<ogr:CLUSTER_ID5>1</ogr:CLUSTER_ID5>
60+
</ogr:kmeans_points_5>
61+
</gml:featureMember>
62+
<gml:featureMember>
63+
<ogr:kmeans_points_5 fid="points.6">
64+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
65+
<ogr:id>7</ogr:id>
66+
<ogr:id2>0</ogr:id2>
67+
<ogr:CLUSTER_ID5>0</ogr:CLUSTER_ID5>
68+
</ogr:kmeans_points_5>
69+
</gml:featureMember>
70+
<gml:featureMember>
71+
<ogr:kmeans_points_5 fid="points.7">
72+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
73+
<ogr:id>8</ogr:id>
74+
<ogr:id2>0</ogr:id2>
75+
<ogr:CLUSTER_ID5>0</ogr:CLUSTER_ID5>
76+
</ogr:kmeans_points_5>
77+
</gml:featureMember>
78+
<gml:featureMember>
79+
<ogr:kmeans_points_5 fid="points.8">
80+
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
81+
<ogr:id>9</ogr:id>
82+
<ogr:id2>0</ogr:id2>
83+
<ogr:CLUSTER_ID5>3</ogr:CLUSTER_ID5>
84+
</ogr:kmeans_points_5>
85+
</gml:featureMember>
86+
</ogr:FeatureCollection>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
3+
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
4+
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
5+
<xs:complexType name="FeatureCollectionType">
6+
<xs:complexContent>
7+
<xs:extension base="gml:AbstractFeatureCollectionType">
8+
<xs:attribute name="lockId" type="xs:string" use="optional"/>
9+
<xs:attribute name="scope" type="xs:string" use="optional"/>
10+
</xs:extension>
11+
</xs:complexContent>
12+
</xs:complexType>
13+
<xs:element name="kmeans_points_5" type="ogr:kmeans_points_5_Type" substitutionGroup="gml:_Feature"/>
14+
<xs:complexType name="kmeans_points_5_Type">
15+
<xs:complexContent>
16+
<xs:extension base="gml:AbstractFeatureType">
17+
<xs:sequence>
18+
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
19+
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
20+
<xs:simpleType>
21+
<xs:restriction base="xs:integer">
22+
<xs:totalDigits value="10"/>
23+
</xs:restriction>
24+
</xs:simpleType>
25+
</xs:element>
26+
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
27+
<xs:simpleType>
28+
<xs:restriction base="xs:integer">
29+
<xs:totalDigits value="10"/>
30+
</xs:restriction>
31+
</xs:simpleType>
32+
</xs:element>
33+
<xs:element name="CLUSTER_ID5" nillable="true" minOccurs="0" maxOccurs="1">
34+
<xs:simpleType>
35+
<xs:restriction base="xs:integer">
36+
<xs:totalDigits value="10"/>
37+
</xs:restriction>
38+
</xs:simpleType>
39+
</xs:element>
40+
</xs:sequence>
41+
</xs:extension>
42+
</xs:complexContent>
43+
</xs:complexType>
44+
</xs:schema>

0 commit comments

Comments
 (0)