# Clustering
## ENS clusters
Weather regimes clustering is an interesting topic.

* [Weather regimes](https://software.ecmwf.int/wiki/download/attachments/34408561/Molteni_Weather-regimes.pdf?api=v2)
* [Clustering](https://software.ecmwf.int/wiki/download/attachments/34408561/Ferranti_Clustering.pdf?api=v2)
* [YA](https://software.ecmwf.int/wiki/display/~mobo/Clustering+-+ENS+Medium-Range)
* [more](https://software.ecmwf.int/wiki/display/~mobo/Clustering+-+ENS+Medium-Range#Clustering-ENSMedium-Range-AdditionalSourcesofInformation)
* [courses](https://software.ecmwf.int/wiki/display/OPTR/Training+courses+--+Multimedia+resources)
* [seas](https://software.ecmwf.int/wiki/display/SEAS/Circulation+regime+clustering+diagnostics)
* [ML](https://www.analyticsvidhya.com/blog/2017/09/common-machine-learning-algorithms/)
* Member states can start from a provided set of scripts and sources to design a customed clustering system.

  ``/home/us/usx/enscluster/1.2/enscluster.sh > out.tmp``
* The original script is turned into a ecFlow task: few variables are added, include head and tail
  For the clustens suite to run, **copy the clustepsens.ecf script into the ECF_HOME** directory for the server to use it.
* It is run in operation as the clusteps task.

In [2]:
# from __future__ import print_function
import os, pwd, sys; 
from datetime import date, timedelta
loc = "/usr/local/lib/python3.5/site-packages/ecflow"
sys.path.append(loc)
from ecf import (Client, Defs, Suite, Family, Task, Defstatus, Edit, Label, Repeat, Time, Trigger)
home = os.getenv("HOME") + "/ecflow_server"
yesterday = date.today() - timedelta(1)
# SUITE
name = "enscluster"
node = Suite(name).add(  # SUITE DEFINITION
    Defstatus("suspended"),
    Edit(ECF_HOME=home, ECF_INCLUDE=home + "/include", ECF_FILES=home + "/files",
         ECF_EXTN=".ecg",  # current convention: ecf for user provided script
         ECF_OUT="%ECF_HOME%", ECF_JOB_CMD="%ECF_JOB% > %ECF_JOBOUT% 2>&1", ),
    Family("ms").add(
        Label("info", "run standalone script"),
        Task("cmd").add(
            Defstatus("complete"),
            Edit(CMD="/home/us/usx/enscluster/1.2/enscluster.sh",
                 ECF_EXTN=".ecg",  # generated
                 ARGS="")), ),
    Family("main").add(
        Repeat("YMD", yesterday.strftime("%Y%m%d"), 20320110, 1, "date"),
        Family("make").add(
            Defstatus("complete"),
            Label("info", "compile and run, execute to run"),
            Task(name).add(Edit(COMPILE="true", ECF_EXTN=".ecg", TASK="cmd"))),
        Task(name).add(Time("08:00")), ), )
with open(home + "/files/cmd.ecg", 'w') as task_template:
    print("""#!%SHELL:/bin/bash%
%include <head.h>
 %CMD:echo% %ARGS:%
%include <tail.h>""", file=task_template)
    
# DEFS
defs = Defs()
defs.add_suite(node)
path = '/' + name

# CLIENT: play/load/replace
def get_username(): return pwd.getpwuid(os.getuid())[0]
def get_uid(): return pwd.getpwnam(get_username()).pw_uid
ECF_PORT = os.getenv("ECF_PORT", 1500 + int(pwd.getpwnam(get_username()).pw_uid))
client = Client("localhost@%s" % ECF_PORT)  # PYTHON CLIENT
client.replace(path, defs)  # load/replace the top node (suite)
client.begin_suite(name)  # BEGIN suite: UNKNOWN -> QUEUED
client.resume(path) # RESUME suite: SUSPENDED -> create job and submit