In [120]:
import datetime
import hashlib
import json
import time
import yaml

DATETIME_FORMAT = '%m/%d %H:%M'


def format_now():
    return datetime.datetime.now().strftime(DATETIME_FORMAT)


class Test:
    def __init__(self, owner,config):
        self.owner = owner
        self.config = config
        self.id = hashlib.sha256(json.dumps(config).encode()).hexdigest()
        
        self.down_message = config.setdefault('down_message', '$name is down, since $last_pass_time')
        self.up_message = config.setdefault('up_message', '$name is up')
        self.ignore_fail_count = config.setdefault('ignore_fail_count', 0)
        self.alert_period_hours = config.setdefault('alert_period_hours', 1.0)
        
    def get(self, key, default=None):
        if not self.id in self.owner.state:
            self.owner.state[self.id] = {}
        return self.owner.state[self.id].setdefault(key, default)

    def set(self, key, value):
        if not self.id in self.owner.state:
            self.owner.state[self.id] = {}
        self.owner.state[self.id][key] = value

    def expand_message(self, message):
        for key, value in self.config.items():
            message = message.replace('$' + key, str(value))
        if not self.id in self.owner.state:
            self.owner.state[self.id] = {}
        for key, value in self.owner.state[self.id].items():
            message = message.replace('$' + key, str(value))
        return message

    def do_pass(self):
        if self.get('state') != 'passing':
            self.owner.notify(self.expand_message(self.up_message))
            self.set('state', 'passing')
            self.set('first_pass_time', format_now())
            self.set('last_fail_alert_time', 0)
        self.set('last_pass_time', format_now())
        self.set('fail_count', 0)
        self.set('url', 'http://%s:%d' % ( self.config['host'], self.config['port']))
        self.set('global_worker_id', self.config['worker_id'])
        self.set('model_id', self.config['model_id'])
        dict_to_redis_hset(r, 'server_real_time_state', self.owner.state)
        with open('heartbeat_ok.json', 'w') as state_file:
            json.dump(self.owner.state, state_file)

    def do_fail(self):
        fail_count = self.get('fail_count', 0) + 1
        self.set('fail_count', fail_count)
        if fail_count > self.ignore_fail_count:
            if self.get('state') != 'failing':
                self.set('state', 'failing')
                self.set('first_fail_time', format_now())
            alert_time = time.time()
            last_alert_fail_time = self.get('last_fail_alert_time', 0)
            if alert_time - last_alert_fail_time >= self.alert_period_hours * 60 * 60:
                self.set('last_fail_alert_time', alert_time)
                self.owner.notify(self.expand_message(self.down_message))
        self.set('last_fail_time', format_now())
        self.set('url', 'http://%s:%d' % ( self.config['host'], self.config['port']))
        self.set('global_worker_id', self.config['worker_id'])
        self.set('model_id', self.config['model_id'])
        
        dict_to_redis_hset(r, 'server_real_time_state', self.owner.state)
        with open('heartbeat_down.json', 'w') as state_file:
            json.dump(self.owner.state, state_file)



class TCPTest(Test):
    def __init__(self, owner,config):
        Test.__init__(self,owner,config)
        #super().__init__(owner, config)
        import socket
        self.host = config['host']
        self.port = config['port']

    def run(self):
        import socket
        try:
            sock=socket.create_connection((self.host, self.port))
            print('{}:{} OK'.format(self.host, self.port))
            sock.shutdown(socket.SHUT_RDWR)
        except:
            print('{}:{} {}'.format(self.host, self.port, 'err'))
            self.do_fail()
        else:
            self.do_pass()


class HTTPTest(Test):
    def __init__(self, owner, config):
        Test.__init__(self,owner, config)#注意此处参数含self  
        #super().__init__(owner, config)
        import requests
        self.url = 'http://%s:%d' % ( config['host'], config['port'])#'http://%s:%d' % (worker.host,worker.port)#config['url']
        self.headers ={'x-ha-access': 'XXXXXXXX', 'Content-Type': 'application/json'}#config.get('headers', {})
        #print('self.headers',self.headers)
    def run(self):
        import requests
        try:
            r = requests.get(self.url, headers=self.headers)
            print(self.url, r.status_code, r.reason)
            if r.status_code == 200:
                self.do_pass()
            else:
                self.do_fail()
        except requests.ConnectionError as err:
            print('{}:{} {}'.format(self.url, self.headers, err))
            self.do_fail()

TEST_PROVIDERS = [('tcp', TCPTest),('http', HTTPTest)]

class Heartbeat:
    def __init__(self):
        self.state = {}
        self.tests = []
        self.alerts = []
    def _load_tests(self):
        for key, provider in TEST_PROVIDERS:
            
            self.tests.append(provider(self,{'host':'127.0.0.1','port':8001}))
                    
    def notify(self, message):
        for alert in self.alerts:
            print 'message',message
            alert.send(message)

    def test(self):
        for test in self.tests:
            test.run()

    def run(self):
        self._load_tests()
        self.test()
        for x in  self.tests:
            print x
        #self.save_state()
class Heartbeat4LB:
    def __init__(self):
        self.state = {}
        self.tests = []
        self.alerts = []
    def run_heart(self,config):
        if config['test_type']=='tcp':            
            self.tests.append(TCPTest(self,config))
        else:
            self.tests.append(HTTPTest(self,config))
                    
    def notify(self, message):
        for alert in self.alerts:
            alert.send(message)

    def test(self):
        for test in self.tests:
            test.run()

    def run(self,conf):
        self.run_heart(conf)
        self.test()



import time
t1=time.time()
#if __name__ == '__main__':
#    heartbeat = Heartbeat4LB()
#    heartbeat.run({'host':'127.0.0.1','port':8051,'test_type':'http'})
#    print(time.time()-t1)

In [116]:
r.hgetall('server_real_time_state')

{'3599554d487ac658835d3a40bb14d0a51793cb985ce7c9f9434b4c88cce68ea6': "{'last_fail_alert_time': 1527599183.227461, 'url': u'http://localhost:9019', 'state': 'failing', 'fail_count': 3, 'last_fail_time': '05/29 21:06', 'global_worker_id': u'localhost-9019-2', 'first_fail_time': '05/29 21:06'}",
 '39f6a25897686c3ba25ec1a9fe24a670c4c5e42f476392734933fb91c86920eb': "{'last_fail_alert_time': 1527599183.095376, 'url': u'http://localhost:9005', 'state': 'failing', 'fail_count': 5, 'last_fail_time': '05/29 21:06', 'global_worker_id': u'localhost-9005-1', 'first_fail_time': '05/29 21:06'}",
 '3d582a3591f27fee1a39b0dfcd5b7009ff7fc180849fb12b21eb2bf88d5cb4b7': "{'last_fail_alert_time': 0, 'url': u'http://localhost:8002', 'first_pass_time': '05/29 21:06', 'last_pass_time': '05/29 21:06', 'fail_count': 0, 'state': 'passing', 'global_worker_id': u'localhost-8002-1'}",
 '67b0f92881cba4f1c39bc866cb5d85e66b4cd78f759f2f0f4bdcf5cf7eaa5920': "{'last_fail_alert_time': 0, 'url': u'http://localhost:8001', 'fi

In [124]:
r.hgetall('server_real_time_state')

{'1106f3ac9cf41ce3c84567733ee385e91da61dd2a523407fe0942b3530cdc24f': "{'model_id': u'fib2', 'last_fail_alert_time': 1527599880.954699, 'url': u'http://localhost:9019', 'state': 'failing', 'fail_count': 3, 'last_fail_time': '05/29 21:18', 'global_worker_id': u'localhost-9019-2', 'first_fail_time': '05/29 21:18'}",
 '2e4e0674909b941665469df6f548718f66198f5bd7b98995e7a22751fdc49479': "{'model_id': u'fib2', 'last_fail_alert_time': 0, 'url': u'http://localhost:8001', 'first_pass_time': '05/29 21:18', 'last_pass_time': '05/29 21:18', 'fail_count': 0, 'state': 'passing', 'global_worker_id': u'localhost-8001-2'}",
 '3599554d487ac658835d3a40bb14d0a51793cb985ce7c9f9434b4c88cce68ea6': "{'last_fail_alert_time': 1527599183.227461, 'url': u'http://localhost:9019', 'state': 'failing', 'fail_count': 3, 'last_fail_time': '05/29 21:06', 'global_worker_id': u'localhost-9019-2', 'first_fail_time': '05/29 21:06'}",
 '39f6a25897686c3ba25ec1a9fe24a670c4c5e42f476392734933fb91c86920eb': "{'last_fail_alert_time

In [55]:
class TCPTest():
    def __init__(self, host, port):
        #super(TCPTest, self).__init__(owner, config)
        #super().__init__(owner, config)
        import socket
        self.host = host
        self.port = port

    def run(self):
        import socket
        try:
            sock=socket.create_connection((self.host, self.port),timeout=1.1)# as sock:
            print('{}:{} OK'.format(self.host, self.port))
            sock.shutdown(socket.SHUT_RDWR)
        except:
            print('{}:{} {}'.format(self.host, self.port, 'err'))
            #self.do_fail()
        else:
            print'd'
            #self.do_pass()
x=TCPTest('knowledge.wanda.cn',4443)
import time
t1=time.time()
x.run()
print(time.time()-t1)

knowledge.wanda.cn:4443 err
0.00629281997681


In [23]:
import socket
socket.getaddrinfo("www.python.org", 80, 0, 0, socket.IPPROTO_TCP)

[(2, 1, 6, '', ('151.101.72.223', 80))]

In [153]:
def dict_to_redis_hset(r, hkey, dict_to_store):
    """
    Saves `dict_to_store` dict into Redis hash, where `hkey` is key of hash.
    >>> import redis
    >>> r = redis.StrictRedis(host='localhost')
    >>> d = {'a':1, 'b':7, 'foo':'bar'}
    >>> dict_to_redis_hset(r, 'test', d)
    True
    >>> r.hgetall('test')
    {'a':1, 'b':7, 'foo':'bar'}
    """
    return all([r.hset(hkey, k, v) for k, v in dict_to_store.items()])
import redis
r = redis.StrictRedis(host='localhost')
worklist2=json.loads(r.hgetall('model_type_to_model_to_worker_list')['AI'])

In [144]:
worklist4=json.loads(r.hgetall('model_type_to_model_to_worker_list')['AI'])
worklist4

{u'fib2': [u'localhost-8001-2', u'localhost-8001-1']}

In [128]:
worklist2

{u'fib': [u'localhost-8001-1', u'localhost-9019-1', u'localhost-8003-1'],
 u'fib2': [u'localhost-8001-2',
  u'localhost-9019-2',
  u'localhost-8001-1',
  u'localhost-9019-1'],
 u'predict': [u'localhost-8002-1', u'localhost-9005-1']}

In [129]:
dict_to_redis_hset(r,'lll',worklist2)

True

In [132]:
worklist3={'fib': "[u'localhost-8001-1']"}
dict_to_redis_hset(r,'lll',worklist3)

False

In [133]:
r.hgetall('lll')

{'fib': "[u'localhost-8001-1']",
 'fib2': "[u'localhost-8001-2', u'localhost-9019-2', u'localhost-8001-1', u'localhost-9019-1']",
 'predict': "[u'localhost-8002-1', u'localhost-9005-1']"}

In [121]:
def isalive(self,model_type,worker_id_list):
        worker_id = random.choice(worker_id_list)    
        while True:   
            try: 
                random_worker=json.loads(r.hgetall('model_type_to_worker_id_to_worker')[model_type])[worker_id]
                host_port='http://%s:%d' % (random_worker['host'],random_worker['port'])
                resp = self.session.request('get', url=host_port, params=None, json=None, timeout=3)
                return worker_id
            except requests.exceptions.RequestException:
                worker_id = random.choice(worker_id_list)
                random_worker=json.loads(r.hgetall('model_type_to_worker_id_to_worker')[model_type])[worker_id]
                host_port='http://%s:%d' % (random_worker['host'],random_worker['port'])


In [123]:
worklist_db=json.loads(r.hgetall('model_type_to_model_to_worker_list')['AI'])
heartbeat = Heartbeat4LB()
    
for model_id,worklist in worklist_db.items():
    for worker_id in worklist:
        per_worker=json.loads(r.hgetall('model_type_to_worker_id_to_worker')['AI'])[worker_id]
        heartbeat.run({'host':per_worker['host'],'port':per_worker['port'],'worker_id':worker_id,'model_id':model_id,'test_type':'http'})
        #print model_id,per_worker

(u'http://localhost:8001', 200, 'OK')
(u'http://localhost:8001', 200, 'OK')
http://localhost:9019:{'x-ha-access': 'XXXXXXXX', 'Content-Type': 'application/json'} HTTPConnectionPool(host='localhost', port=9019): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x10f002310>: Failed to establish a new connection: [Errno 61] Connection refused',))
(u'http://localhost:8001', 200, 'OK')
http://localhost:9019:{'x-ha-access': 'XXXXXXXX', 'Content-Type': 'application/json'} HTTPConnectionPool(host='localhost', port=9019): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x10f30fa50>: Failed to establish a new connection: [Errno 61] Connection refused',))
(u'http://localhost:8003', 200, 'OK')
(u'http://localhost:8001', 200, 'OK')
http://localhost:9019:{'x-ha-access': 'XXXXXXXX', 'Content-Type': 'application/json'} HTTPConnectionPool(host='loc

In [159]:
r.hgetall('model_type_to_model_to_worker_list')

{'AI': '{"fib": ["localhost-8002-3", "localhost-8003-1"], "fib2": ["localhost-8001-2"]}',
 '\x80\x02X\x02\x00\x00\x00AIq\x00.': '\x80\x02}q\x00.'}

In [147]:
r.hgetall('model_type_to_worker_id_to_worker')

{'AI': '{"localhost-8001-2": {"local_worker_id": 2, "host": "localhost", "global_worker_id": "localhost-8001-2", "port": 8001}, "localhost-8001-3": {"local_worker_id": 3, "host": "localhost", "global_worker_id": "localhost-8001-3", "port": 8001}, "localhost-8001-1": {"local_worker_id": 1, "host": "localhost", "global_worker_id": "localhost-8001-1", "port": 8001}, "localhost-8003-1": {"local_worker_id": 1, "host": "localhost", "global_worker_id": "localhost-8003-1", "port": 8003}}'}

In [None]:
{'AI':}

In [172]:
r.hgetall('model_id_request_count').pop('fib2')

'{"localhost-8001-2": 1, "localhost-9005-1": 1}'

In [175]:
import redis
def del_hkeys(pattern) :  
    cnt = redis.Redis(host='127.0.0.1',port=6379,db=2)  
    key_dict={}  
    keys = cnt.keys(pattern);  
    for key in keys:  
        subkeys = cnt.hkeys(key)  
        key_dict[key] = subkeys  
    pipe = cnt.pipeline(transaction=False)  
    counter=0  
    for key in key_dict.keys():  
        subkeys = key_dict[key]  
        for subkey in subkeys:  
            cnt.hdel(key, subkey)  
            counter+=1  
            if counter > 10000:  
                pipe.execute()  
    pipe.execute()
del_hkeys('fib2')

In [176]:
r.hgetall('model_id_request_count')

{'fib': '{"localhost-8002-3": 1}',
 'fib2': '{"localhost-8001-2": 1, "localhost-9005-1": 1}',
 'predict': '{"localhost-9005-1": 1}'}

In [170]:
r.hdel('model_id_request_count','\x80\x02X\x03\x00\x00\x00fibq\x00.')

1

In [168]:
r.hgetall('server_real_time_state')

{'1106f3ac9cf41ce3c84567733ee385e91da61dd2a523407fe0942b3530cdc24f': "{'model_id': u'fib2', 'last_fail_alert_time': 1527641831.724177, 'url': u'http://localhost:9019', 'state': 'failing', 'fail_count': 3, 'last_fail_time': '05/30 08:57', 'global_worker_id': u'localhost-9019-2', 'first_fail_time': '05/30 08:57'}",
 '13c9f03a7ae085ff673a325df2b0173946098c85eec13e0ef57062ab80e6d50b': "{'model_id': u'fib', 'last_fail_alert_time': 0, 'url': u'http://localhost:8001', 'first_pass_time': '05/30 18:44', 'last_pass_time': '05/30 18:44', 'fail_count': 0, 'state': 'passing', 'global_worker_id': u'localhost-8001-3'}",
 '26d1e11d0ad3d3bf48b81d8658afc3c5a1796438bd348e72c9551d5893aa8665': "{'last_fail_alert_time': 0, 'url': u'http://localhost:8003', 'first_pass_time': '05/31 11:32', 'last_pass_time': '05/31 11:32', 'fail_count': 0, 'state': 'passing', 'global_worker_id': u'localhost-8003-1'}",
 '2e4e0674909b941665469df6f548718f66198f5bd7b98995e7a22751fdc49479': "{'model_id': u'fib2', 'last_fail_alert_

In [156]:
from apscheduler.events import EVENT_ALL
from apscheduler.schedulers.background import BackgroundScheduler

In [157]:
# coding:utf-8
from apscheduler.schedulers.blocking import BlockingScheduler
import datetime


def aps_test():
    print datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '你好'


scheduler = BlockingScheduler()
scheduler.add_job(func=aps_test, trigger='cron', second='*/5')
scheduler.start()

2018-05-30 18:54:50 你好
2018-05-30 18:54:55 你好
2018-05-30 18:55:00 你好
2018-05-30 18:55:05 你好
2018-05-30 18:55:10 你好
2018-05-30 18:55:15 你好
2018-05-30 18:55:20 你好
2018-05-30 18:55:25 你好
2018-05-30 18:55:30 你好


KeyboardInterrupt: 

In [177]:
def dict_to_redis_hset(r, hkey, dict_to_store):
    """
    Saves `dict_to_store` dict into Redis hash, where `hkey` is key of hash.
    >>> import redis
    >>> r = redis.StrictRedis(host='localhost')
    >>> d = {'a':1, 'b':7, 'foo':'bar'}
    >>> dict_to_redis_hset(r, 'test', d)
    True
    >>> r.hgetall('test')
    {'a':1, 'b':7, 'foo':'bar'}
    """
    return all([r.hset(hkey, k, v) for k, v in dict_to_store.items()])

In [184]:
ai={'ai':{'a':1, 'b':7, 'foo':'bar'}}
dict_to_redis_hset(r,'dd',ai)

False

In [180]:
r.hgetall('dd')

{'ai': "{'a': 1, 'b': 7, 'foo': 'bar'}"}

In [186]:
ai2={'ai':json.dumps({'a':0})}

In [187]:
dict_to_redis_hset(r,'dd',ai2)

False

In [188]:
r.hgetall('dd')

{'ai': '{"a": 0}'}