Skip to content

Commit

Permalink
Merge pull request #98 from shiodat/add-clustering-service
Browse files Browse the repository at this point in the history
Add clustering service
  • Loading branch information
TkrUdagawa authored Dec 8, 2017
2 parents d5838f8 + 0fc0bc6 commit 7c00087
Show file tree
Hide file tree
Showing 8 changed files with 1,354 additions and 1 deletion.
4 changes: 3 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jubakit is a Python module to access Jubatus features easily.
jubakit can be used in conjunction with `scikit-learn <http://scikit-learn.org/>`_ so that you can use powerful features like cross validation and model evaluation.
See the `Jubakit Documentation <http://jubat.us/en/jubakit>`_ for the detailed description.

Currently jubakit supports `Classifier <http://jubat.us/en/api/api_classifier.html>`_, `Regression <http://jubat.us/en/api/api_regression.html>`_, `Anomaly <http://jubat.us/en/api/api_anomaly.html>`_, `Recommender <http://jubat.us/en/api/api_recommender.html>`_ and `Weight <http://jubat.us/en/api/api_weight.html>`_ engines.
Currently jubakit supports `Classifier <http://jubat.us/en/api/api_classifier.html>`_, `Regression <http://jubat.us/en/api/api_regression.html>`_, `Anomaly <http://jubat.us/en/api/api_anomaly.html>`_, `Recommender <http://jubat.us/en/api/api_recommender.html>`_, `Clustering <http://jubat.us/en/api/api_clustering/html>`_ and `Weight <http://jubat.us/en/api/api_weight.html>`_ engines.

Install
-------
Expand Down Expand Up @@ -105,6 +105,8 @@ See the `example <https://github.com/jubatus/jubakit/tree/master/example>`_ dire
+-----------------------------------+-----------------------------------------------+-----------------------+
| recommender_npb.py | Recommend similar items | |
+-----------------------------------+-----------------------------------------------+-----------------------+
| clustering_2d.py | Clustering 2-dimensional dataset | |
+-----------------------------------+-----------------------------------------------+-----------------------+
| weight_shogun.py | Tracing fv_converter behavior using Weight | |
+-----------------------------------+-----------------------------------------------+-----------------------+
| weight_model_extract.py | Extract contents of Weight model file | |
Expand Down
301 changes: 301 additions & 0 deletions example/blobs.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
cluster,x1,x2
1,-0.93365052788,-1.14252366433
1,-1.30942924203,-1.17678133335
1,-1.34024502265,-1.31247402809
0,0.705946662732,1.08441625777
1,-1.13881130967,-0.7085249752
0,0.956149945383,0.628758392122
1,-0.749811617546,-0.783501028126
0,1.09808612117,1.20026950289
0,1.27769396504,0.928303484449
0,0.666131442806,0.876941115329
0,1.10948853175,0.788762702015
0,0.928641954222,0.737586758678
1,-0.92868829085,-0.802790686893
0,1.08601733009,0.743459059141
0,1.15436399849,1.23561794118
0,0.894167921591,1.00858976753
0,1.01180789786,0.962622197845
1,-1.17281887522,-1.31434156598
0,0.722735657016,0.748793610328
0,1.07877265618,0.953919497595
1,-1.0616397876,-0.836511727778
1,-0.807477769967,-0.865361420293
0,0.953572364838,0.935215671951
0,0.997909419762,0.957200423884
0,0.96365322024,1.06081199546
0,0.961237465492,1.07967600026
1,-0.495908857816,-0.959438464457
1,-1.69287710777,-1.23728584981
1,-0.967713503586,-1.06734691425
1,-0.721891018457,-0.961223455053
0,1.32631992264,1.01557318933
1,-0.837405444918,-1.10729247379
0,1.01495695974,1.23246945591
1,-0.756971166227,-1.12469423796
1,-1.14208325927,-0.482392806743
0,0.977752246202,0.879820916999
0,0.862810629696,1.08769305379
0,1.09954406122,0.938612261241
1,-0.823182960746,-0.930780485092
0,0.689785527641,1.19453266431
0,0.757724405413,1.3473090632
0,0.877481320621,0.793741068761
0,0.928999199851,1.00270428848
1,-0.845402369455,-0.749220815906
1,-0.970156814095,-0.758907438626
1,-0.87235646144,-0.880970665629
0,0.607339613379,1.16354040159
1,-1.34614348729,-1.02540477671
1,-1.2145504187,-0.999724235822
1,-0.870527368967,-1.11699430888
1,-0.718380625072,-1.1088022103
0,1.062650858,0.785094270862
1,-0.806293152097,-1.09505549938
0,0.811787817815,0.869230709957
0,0.892888731123,1.12132309814
1,-0.997885067324,-1.14134314191
1,-0.910173309421,-1.10079246393
1,-0.481630999694,-0.883757493582
0,0.707076543774,1.15769692848
1,-1.16068210274,-1.25297022359
1,-0.881054298908,-1.08869920088
0,1.01845183546,1.10855776458
0,1.00670796987,0.511756022803
0,1.09775194332,1.12818658085
1,-0.867627089872,-0.522748510569
0,0.558298055212,0.886244752846
1,-1.19590082703,-0.580458281596
0,0.825876418599,1.02713177735
1,-1.10534892994,-1.01661263094
0,1.0137580997,1.01394515234
0,1.27443347546,1.15344843593
1,-0.841169236496,-1.2858972431
0,1.06639728878,1.15198632838
0,0.732734102261,0.84064579653
1,-0.97370887636,-1.06128053325
0,1.14242937492,1.0584408659
1,-0.876023303483,-1.18236840362
0,1.07701802062,0.746070794056
1,-0.935814391035,-1.08753717031
1,-1.01698475648,-0.775420213246
0,0.537324421414,1.44402657477
1,-1.3170507062,-0.872816250137
1,-1.13712268693,-0.589193699823
1,-0.913274447267,-0.887080518959
1,-1.00859225235,-1.06118433703
1,-0.792496157825,-0.915519738475
0,0.980027951159,0.494567435192
0,1.33339766676,0.837165358674
1,-0.876643120952,-1.18292066828
1,-0.979084375782,-0.82307189748
0,0.816902215077,0.899673400048
1,-0.57374001078,-0.734988521948
1,-1.0934833097,-0.801864954993
1,-1.06061689938,-1.34832895394
1,-0.752697423238,-0.753716927762
1,-1.42694528231,-0.697203669377
1,-0.841274764173,-0.781559535673
0,0.756459398564,0.844521971092
0,1.29976101673,1.10831386395
1,-1.03171705276,-1.25389155479
1,-0.890890412259,-0.812819878009
1,-0.551192238809,-0.994639833663
0,1.24756402929,0.776557542347
0,1.05839562468,1.04517104829
1,-1.12073030272,-0.837256437347
1,-0.827044749048,-1.05624253637
0,0.51395666615,1.11051505652
0,0.739673389253,0.741822007211
1,-1.27381981888,-1.19688252607
1,-0.654682929756,-0.973799559948
1,-1.27326329396,-0.989773871415
1,-1.09444784539,-1.27763064589
1,-0.993825738802,-1.09185165069
1,-0.802710426742,-0.993350846287
0,1.11525094936,1.00938723789
0,0.917137550958,0.816560261526
0,0.992330530566,0.791111411532
1,-0.707168296188,-0.925412488074
0,1.13771832227,0.96075461916
0,0.890959410912,0.938086246527
1,-1.03627843595,-0.776940390286
0,0.915069704117,0.666640094248
0,0.816783179949,0.786688515747
0,0.801292624248,0.903060164209
0,1.16113203714,1.03195696871
0,1.26008314539,1.24444035364
1,-0.948892165167,-0.926001056779
1,-0.941296417563,-1.10844766803
1,-1.10542842758,-0.92429505133
0,1.01750150234,1.25568028746
0,0.752380047579,1.05486143046
1,-0.73702836877,-1.23281202586
0,0.627606560555,1.05109826999
0,0.517776378126,1.19789326426
1,-0.713222432843,-0.958592252019
0,1.04382879954,0.480124791259
1,-1.09346632927,-0.536183575128
0,0.797768559477,0.867412022334
1,-1.25166127245,-1.20054168598
0,1.13751133453,1.4897145682
0,1.32907920212,1.16890122329
1,-1.05221319285,-0.619013961394
0,1.31527194689,0.928056818898
1,-0.84617505511,-0.960290093304
0,0.93145065596,1.0457932325
1,-1.1406088498,-0.766721044648
1,-1.18117331655,-1.09069328092
0,1.08225304345,0.829386217405
1,-0.769894971178,-0.699558950919
0,1.02050957341,0.986519722895
1,-1.34325173759,-1.18516751807
1,-0.582824676028,-0.97448263386
1,-0.91682874077,-1.30224725509
0,1.20912285457,0.986237097157
1,-0.83145177721,-1.04546167864
1,-1.20867057072,-0.695569099091
1,-0.964820787331,-0.986652971401
1,-0.723287318177,-1.17655839686
0,1.24904540791,0.958520057889
1,-0.959382519428,-1.10295631686
1,-1.01521994073,-0.81720667044
0,0.979055380374,0.930820910756
0,1.11319239668,1.10061025635
1,-1.10539188939,-1.15785134438
0,1.25569667484,0.809511757516
1,-1.00258583913,-1.0325433727
1,-1.33488656243,-1.09859469138
0,0.891235180887,0.819265367588
0,1.14944236244,1.1630855575
1,-0.946205231191,-0.849020692018
0,0.943022145949,0.715867364935
1,-0.76076441316,-0.779314332251
0,0.910738956598,1.35569832177
0,0.721201637698,0.955888602789
1,-0.787255827868,-0.55261444834
1,-0.573276712201,-0.959791230078
0,1.0087782502,1.45900319422
0,1.01110015772,1.33955783784
1,-0.605276760547,-0.952284518005
1,-0.925546454866,-1.17246909632
1,-1.02198782791,-0.866145630703
0,0.957553827204,1.05482417288
1,-0.811182149039,-1.04335186967
0,1.02046011441,0.904556127609
1,-0.916580747162,-1.08094162804
1,-1.31552650363,-0.750744076626
0,0.559553483781,1.12829448684
0,1.29397635256,1.04237547388
1,-1.13287497615,-0.81420008866
1,-0.944853915653,-1.20704136072
1,-1.2461332122,-0.958377844123
1,-0.977067258698,-1.30396981597
0,0.903306301543,0.949296054698
0,0.952134777039,1.06964011491
0,0.621814550845,0.623016191179
0,0.996972697804,1.2927729619
0,0.719837189355,0.973600117394
0,1.24771674478,0.924379447806
0,1.09031605882,0.710713190899
0,0.602160571357,0.907628480382
1,-1.30509663897,-1.22218292874
1,-1.24449003602,-1.11234483665
1,-0.996925647094,-1.12394888285
1,-0.796142515273,-1.06934554615
1,-1.01453272118,-0.935585735187
0,0.892871032184,0.89163677657
0,0.878883422428,0.699279561569
1,-0.888679242366,-0.954850905534
0,1.48164360015,0.942929378117
0,0.877942993272,0.82188051988
0,1.2840129128,0.782852923729
0,0.979628162094,1.0046376355
0,1.0011746956,1.12267964379
0,0.830659477108,0.869425923633
1,-1.03726570769,-1.23452374255
0,0.987657575081,1.37662447447
0,0.580467666554,0.908464440585
1,-0.660081209999,-0.984522246947
1,-0.980369691949,-1.23243599229
0,0.606533785575,0.699995653768
0,1.2587530271,0.85275307954
0,0.979099363225,1.17801837723
1,-1.01138332504,-0.896736446022
0,0.769932593875,1.1598559749
1,-1.04170291003,-0.912274263627
0,0.371305675372,1.19505468158
1,-0.896310853917,-1.15976596784
0,0.97455113657,0.807068299647
0,1.21648553711,0.767050284428
0,0.831378092761,0.649644408667
1,-1.25986459056,-1.54711480041
0,1.13340112634,0.736359688345
1,-1.2384024028,-1.06698951467
1,-0.988528343692,-0.73969035651
1,-1.1593662035,-1.37746336517
1,-0.933518993839,-0.984825025698
1,-1.51994563014,-0.992198993144
1,-1.18289178831,-1.09432642972
1,-1.1489853972,-1.25476230938
0,0.850649329045,1.04511356058
0,0.94721169958,0.602117008885
0,0.769624988299,0.841565193223
0,0.882463189363,0.830069344874
1,-1.19048953014,-1.11239954012
1,-1.37596644766,-0.835583923757
0,1.15934228485,1.09186345868
0,1.05753142965,1.21179706895
0,1.01735561622,0.706920484683
0,1.25766505085,1.46784431684
1,-0.904286143701,-0.99044421338
0,0.688260024665,0.992351467181
0,1.08799888383,0.775358047861
1,-1.28388131583,-0.693712470801
1,-0.998580601632,-0.918860662659
1,-0.777905864167,-0.665513500153
1,-1.04125869063,-1.01713047393
1,-0.872275784821,-0.697519693307
0,1.15431043138,1.10149551628
0,1.33418913895,1.40379322465
0,1.37519870216,0.721840657971
0,1.02407222119,0.839840744484
1,-0.812167585814,-1.05228941383
1,-1.17749289026,-0.901481708669
0,1.01857858685,0.897001451661
0,0.871092966037,1.15403550193
0,0.998247749893,1.00736964006
1,-0.833648095387,-1.20145439675
0,1.02711313653,0.749427178008
1,-0.934653682918,-0.950688169763
1,-0.707870466892,-1.05779320848
0,1.19638680512,0.92284423706
0,1.2409973686,1.16302331561
1,-1.07686675539,-0.767626617732
0,0.877645943426,1.21517199377
1,-0.971147020183,-1.11221146273
1,-0.867142933604,-0.749630698221
1,-1.044349193,-0.998343943806
0,1.010116662,1.18559779957
1,-1.07085798109,-1.11210027352
0,0.807044965878,0.988275972749
1,-0.989614222944,-0.910899342659
0,1.06351163418,1.02178086384
0,0.926517738237,1.0643475345
1,-0.824690984171,-1.12187500144
0,0.952629213312,1.28773201112
0,1.4960145587,1.19712198723
1,-0.875043933252,-1.2711026884
0,0.861985501019,1.2682967345
0,1.02214055141,0.971836886647
1,-1.04975530185,-0.861185052048
0,1.16225778459,0.926479092841
1,-0.935322471815,-0.96645101227
0,1.23139224734,0.793878878449
1,-0.83692733936,-0.698771683734
0,1.09847002587,1.00367339063
1,-0.937565881378,-1.12237616805
0,0.979596407895,0.865568046059
1,-0.912566324192,-0.872655513966
1,-0.606827980293,-0.626358852502
1,-0.905890154881,-0.680739811248
53 changes: 53 additions & 0 deletions example/clustering_2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import absolute_import, division, print_function, unicode_literals

"""
Using Clustering
========================================
This is a simple example that illustrates Clustering service usage.
"""

from jubakit.clustering import Clustering, Schema, Dataset, Config
from jubakit.loader.csv import CSVLoader

# Load a CSV file.
loader = CSVLoader('blobs.csv')

# Define a Schema that defines types for each columns of the CSV file.
schema = Schema({
'cluster': Schema.ID,
}, Schema.NUMBER)

# Create a Dataset.
dataset = Dataset(loader, schema)

# Create an Clustering Service.
cfg = Config(method='kmeans')
clustering = Clustering.run(cfg)

# Update the Clustering model.
for (idx, row_id, result) in clustering.push(dataset):
pass

# Get clusters
clusters = clustering.get_core_members(light=False)
# Get centers of each cluster
centers = clustering.get_k_center()

# Calculate SSE: sum of squared errors
sse = 0.0
for cluster, center in zip(clusters, centers):
# Center of clusters
center = {"x1": center.num_values[0][1], "x2": center.num_values[1][1]}
for d in cluster:
vector = d.point.num_values
x1 = [x[1] for x in vector if x[0] == 'x1'][0]
x2 = [x[1] for x in vector if x[0] == 'x2'][0]
sse += (x1 - center["x1"])**2 + (x2- center["x2"])**2
print('SSE:', sse)

clustering.stop()
Loading

0 comments on commit 7c00087

Please sign in to comment.