diff --git a/check_rancher_services.py b/check_rancher_services.py
index 485297e..8ef995b 100644
--- a/check_rancher_services.py
+++ b/check_rancher_services.py
@@ -13,6 +13,7 @@
 import json
 import subprocess
 import time
+import sqlite3
 # this requires python 3.4
 import pathlib
 from pprint import pprint
@@ -60,7 +61,7 @@ def process_section(conf, section):
 	hostsReq=session.get(urlbase+'/v2-beta/projects/' + envid + '/hosts/', auth=(username,password))
 	hostData=hostsReq.json()['data']
 
-# monitor an agent
+# monitor rancher agents
 	for host in hostData:
 		state=3
 		stateText='UNKNOWN'
@@ -93,32 +94,13 @@ def process_section(conf, section):
 		sys.exit(0)
 	stackId = stackData[myStack]['id']
 
-### this part needs a lot of work
-### moving to separate check_rancher_containers.py till we can figure out how to
-### get stats directly from rancher 1.x API
-#	memState = 0
-#	memStateTxt = 'OK'
-#	memCommentTxt = ''
-## can only check stats on the local host
-## to do: try to talk to the websocket to get stats from rancher API instead
-#	dockerStats = dict()
-
-# only get stats if hostid specified (since some hosts' subprocess module is broken)
-#	if hostid is not None:
-#		dockerStatsProc = subprocess.run(["docker", "stats", "--no-stream", "--no-trunc", "-a", "--format", "'{{.ID}}:{{.MemUsage}}'"], stdout=subprocess.PIPE)
-##		print(dockerStatsProc)
-#		for line in dockerStatsProc.stdout.decode('utf-8').rstrip().split('\n'):
-#			mylist = line.strip("'").split(':')
-#			memUse = mylist[1].split(' ')
-#			dockerStats[mylist[0]] = memUse[0]
-##		print(dockerStats)
-
+##### test health of listed services (if any)
 # track if there's an old dummy service that wasn't deleted
 	oldDummyService = None
 
 	for serviceId in stackData[myStack]['serviceIds']:
 	#	print (serviceId)
-# in that stack, look through serviceIds for named services in /v2-beta/projects/envid/services/serviceId
+# in the stack, look through serviceIds for named services in /v2-beta/projects/envid/services/serviceId
 		serviceReq=session.get(urlbase+'/v2-beta/projects/' + envid + '/services/' + serviceId, auth=(username,password))
 		svc=serviceReq.json()
 		if svc['name'] == 'checkmkDummy':
@@ -137,66 +119,83 @@ def process_section(conf, section):
 			print (str(serviceState) + ' ' + envname + '_' + stackname + '_' + svc['name'] + ' - ' + serviceStateTxt + ' running instances: ' + str(svc['currentScale']))
 	#	    print svc['healthState']
 
+
+##### test overall stack health
 	if (conf.has_option(section,'test_stack_health') and conf.getboolean(section,'test_stack_health') is True):
 		stackState = 3
 		stackStateTxt = 'UNKNOWN'
+		stackExtraTxt = ''
 
 		if (conf.has_option(section,'stack_health_dir')):
-		    stackHealthFile = conf[section]['stack_health_dir'] + '/' + envname + '_' + stackname + '_stackHealth'
-		    stackPath = pathlib.Path(stackHealthFile)
-		    # make sure the file exists, in case stack has never been healthy
-		    # (should also error immediately if a bad path is provided in the config file)
-		    if (not stackPath.exists()):
-		        stackPath.touch()
-			
+			stackHealthFile = conf[section]['stack_health_dir'] + '/' + envname + '_' + stackname + '_stackHealth.db'
+			stackPath = pathlib.Path(stackHealthFile)
+			# make sure the db file exists, in case stack has never been checked
+			# (should also error immediately if a bad path is provided in the config file)
+			if (not stackPath.exists()):
+				conn = sqlite3.connect(stackHealthFile)
+				conn.execute('CREATE TABLE badServices (serviceId TEXT PRIMARY KEY, serviceName TEXT, lastUpdate DATETIME DEFAULT CURRENT_TIMESTAMP)')
+				conn.commit()
+				conn.close()
+
+		conn = sqlite3.connect(stackHealthFile)
+
 		if stackData[myStack]['healthState'] == 'healthy':
 			stackState = 0
 			stackStateTxt = 'OK'
 			if (conf.has_option(section,'stack_health_dir')):
-			    stackPath.touch()
-#		if stackData[myStack]['healthState'] == 'degraded':
-		# this may be too broad, but let's see if it's a problem
+# just assume all services are healthy if stack is, and delete all bad services from the db
+				conn.execute('DELETE FROM badServices')
+				conn.commit()
+
+##### if stack reports degraded, look through services in stack to verify
+# (rancher 1 doesn't really report this very well)
 		else:
-			stackState = 1
-			stackStateTxt = 'WARNING'
-			if (conf.has_option(section,'stack_health_dir') and conf.has_option(section,'stack_health_age') and stackPath.exists()):
-			    # check age, if too old, make state critical
-			    # if missing, don't do anything?
-			    if (time.time() - stackPath.stat().st_mtime > float(conf[section]['stack_health_age'])):
-			        stackState = 2
-			        stackStateTxt = 'CRITICAL (state ' + str(int(time.time() - stackPath.stat().st_mtime)) + 'sec old)'
-
-		print (str(stackState) + ' ' + envname + '_' + stackname + '_stackHealth - ' + stackStateTxt + ' stack health is ' + stackData[myStack]['healthState'])
-
-# if on a host running containers, check their resources
-# assume only one instance per service
-### this part needs lots of work
-#		if hostid is not None:
-#			instanceReq=session.get(urlbase+'/v2-beta/projects/' + envid + '/instances/' + svc['instanceIds'][0], auth=(username,password))
-#			rancherInstance=instanceReq.json()
-# to do: give a hostname, and match it up to the rancher API hostId
-# otherwise, if the hostId changes, such as if a host is removed and added back to Rancher,
-# the container memory check will always be OK
-#			if rancherInstance['hostId'] == hostid:
-##				print (rancherInstance['name'] + ' ' + rancherInstance['externalId'])
-#				memUse = dockerStats[rancherInstance['externalId']]
-##				print (memUse)
-## crude hack: docker stats outputs human readable.  assume we only care about GB or more use
-## future: better calculations
-#				if 'G' in memUse:
-#					memState = 1
-#					memStateTxt = 'WARNING'
-#					memCommentTxt += (svc['name'] + ': ' + str(memUse) + ' ;; ')
-
-#	if hostid is not None:
-#		print (str(memState) + ' ' + envname + '_' + stackname + '_containerMemory-' + hostid + ' - ' + memStateTxt + ' big mem containers on host ' + hostid + ' : ' + memCommentTxt)
+			# we're trolling this again, meh. but only when stack is unhealthy, so don't worry about it
+			for serviceId in stackData[myStack]['serviceIds']:
+				healthServiceReq=session.get(urlbase+'/v2-beta/projects/' + envid + '/services/' + serviceId, auth=(username,password))
+				healthSvc=healthServiceReq.json()
+#				print (healthSvc['id'] + ' ' + healthSvc['healthState'])
+				if (healthSvc['healthState'] == 'healthy' or healthSvc['healthState'] == 'started-once'):
+					conn.execute('DELETE FROM badServices WHERE serviceId = ?', [ healthSvc['id'] ] )
+					conn.commit()
+				else:
+					conn.execute('INSERT OR IGNORE INTO badServices (serviceId, serviceName) VALUES (?,?)', [ healthSvc['id'], healthSvc['name']] )
+					conn.commit()
+					
+
+			cursor = conn.cursor()
+			# this should return only services that have been unhealthy for a while (in theory persistently unhealthy)
+			query = "SELECT serviceName FROM badServices WHERE (datetime(lastUpdate) < datetime('now','-" + conf[section]['stack_health_age'] + " seconds' ))"
+#			print (query)
+			cursor.execute(query)
+
+			# fetchall isn't great in theory, but in practice we should have very few rows in these tables
+			badServices = cursor.fetchall()
+			if (len(badServices) == 0):
+				# all services now OK, so assume stack OK
+				stackState = 0
+				stackStateTxt = 'OK'
+			else:
+				stackState = 1
+				stackStateTxt = 'WARNING'
+				stackExtraTxt = ' ; bad services: ' + ' '.join([ t[0] for t in badServices])
+				query = "SELECT serviceName FROM badServices WHERE (datetime(lastUpdate) < datetime('now','-" + str(2 * int(conf[section]['stack_health_age'])) + " seconds' ))"
+#			print (query)
+				cursor.execute(query)
+				reallyBadServices = cursor.fetchall()
+				if (len(reallyBadServices) > 0):
+					stackState = 2
+					stackStateTxt = 'CRITICAL'
+
+		conn.close()
+		print (str(stackState) + ' ' + envname + '_' + stackname + '_stackHealth - ' + stackStateTxt + ' stack health is ' + stackData[myStack]['healthState'] + stackExtraTxt)
 
 	if (not conf.has_option(section,'test_create_new')):
 		return None
 	if (conf.getboolean(section,'test_create_new') is False):
 		return None
 
-### spin up a dummy new service
+##### if requested in config, test spinning up a dummy new service
 # initially copied from narrative-traefiker
 	containerConfig = {u'assignServiceIpAddress': False,
                         u'createIndex': None,
@@ -343,10 +342,7 @@ def process_section(conf, section):
 	print (str(dummyServiceState) + ' ' + envname + '_' + stackname + '_createNewService - ' + dummyServiceStateTxt)
 
 
-# in each service find the last logs?  may be hard, need websocket
-
-
-# main loop
+##### main loop
 # if args provided, use them, otherwise use sections from config file
 if args.sections:
 	sections = args.sections
@@ -356,4 +352,3 @@ def process_section(conf, section):
 for section in sections:
 #	print (section)
 	process_section(conf, section)
-