In [5]:
def loadDataSet():
	dataSet = [['r', 'z', 'h', 'j', 'p'], 
				['z', 'y', 'x', 'w', 'v', 'u', 't', 's'],
				['z'],
				['r', 'x', 'n', 'o', 's'],
				['y', 'r', 'x', 'z', 'q', 't', 'p'],
				['y', 'z', 'x', 'e', 'q', 's', 't', 'm']]
	return dataSet

def createInitSet(dataSet):
	resultDict = {}
	for tx in dataSet:
		resultDict[frozenset(tx)] = 1
	return resultDict

dataSet = loadDataSet()
initSet = createInitSet(dataSet)
initSet

{frozenset({'e', 'm', 'q', 's', 't', 'x', 'y', 'z'}): 1,
 frozenset({'n', 'o', 'r', 's', 'x'}): 1,
 frozenset({'z'}): 1,
 frozenset({'s', 't', 'u', 'v', 'w', 'x', 'y', 'z'}): 1,
 frozenset({'p', 'q', 'r', 't', 'x', 'y', 'z'}): 1,
 frozenset({'h', 'j', 'p', 'r', 'z'}): 1}

In [11]:
# define tree node data structure
class treeNode:
	# init function
	def __init__(self, nameValue, numOccur, parentNode):
		self.name = nameValue
		self.count = numOccur
		# link to similar node
		self.nodeLink = None
		self.parent = parentNode
		self.children = {}

	# increase count value
	def inc(self, numOccur):
		self.count += numOccur

	# display tree structure
	def disp(self, ind = 1):
		print ' ' * ind, self.name, self.count, '(',id(self),')'
		for child in self.children.values():
			child.disp(ind + 1)

# create FP growth tree
def createTree(initSet, minSupp = 1):
	# create init header table as {..., 'x': 4, 'z': 5}
	headerTable = {}
	for tx in initSet:
		for item in tx:
			headerTable[item] = headerTable.get(item, 0) + initSet[tx]
	#print headerTable  
	
	# delete element if < min support value
	for k in headerTable.keys():
		if headerTable[k] < minSupp:
			del(headerTable[k])

	# return none if nothing meet requirement
	freqItemSet = set(headerTable.keys())
	if len(freqItemSet) == 0:
		return None, None

	# re-org header table to {'x': [4, <node>]}
	for k in headerTable:
		headerTable[k] = [headerTable[k], None]
	printHeader(headerTable)

	resultTree = treeNode('[*]', 1, None)
	for txSet, count in initSet.items():
		txItemDict = {}
		for item in txSet:
			# only filter the frequent items
			if item in freqItemSet:
				txItemDict[item] = headerTable[item][0]
		if len(txItemDict) > 0:
			# sort items by count values in header table
			orderedItems = [v[0] for v in sorted(txItemDict.items(), \
				key = lambda x: x[1], reverse = True)]
			print 'tx dict:', txItemDict, 'sorted:', orderedItems
			updateTree(orderedItems, resultTree, headerTable, count)

	printHeader(headerTable)
	return resultTree, headerTable

def printHeader(headerTable, item = None):
	if item != None:
		items = [item]
	else:
		items = headerTable.keys()
	
	for k in items:
		currNode = headerTable[k][1]
		info = str(currNode) if (currNode == None) else \
			currNode.name + ':' + str(currNode.count) + '(' + str(id(currNode)) + ')'
		while (currNode != None and currNode.nodeLink != None):
			currNode = currNode.nodeLink
			info += ' -> ' + \
			currNode.name + ':' + str(currNode.count) + '(' + str(id(currNode)) + ')'
		print '[%s:%s] -> %s' % (k, headerTable[k][0], info)

def updateTree(items, inTree, headerTable, count):
	# increase count if node exists
	if items[0] in inTree.children:
		print 'increase %s count: %d+1' % (items[0], inTree.children[items[0]].count)
		inTree.children[items[0]].inc(count)
	# create new node if not exists
	else:
		print 'create new tree node: %s' % items[0]
		inTree.children[items[0]] = treeNode(items[0], count, inTree)
		if headerTable[items[0]][1] == None:
			headerTable[items[0]][1] = inTree.children[items[0]]
		else:
			updateHeader(headerTable[items[0]][1], inTree.children[items[0]])
		printHeader(headerTable, items[0])
	# inTree.disp()

	# recursive process rest items
	if len(items) > 1:
		updateTree(items[1::], inTree.children[items[0]], headerTable, count)

def updateHeader(currNode, targetNode):
	while (currNode.nodeLink != None):
		currNode = currNode.nodeLink
	currNode.nodeLink = targetNode
        
fpTree, headerTab = createTree(initSet, 3)

[s:3] -> None
[r:3] -> None
[t:3] -> None
[y:3] -> None
[x:4] -> None
[z:5] -> None
tx dict: {'y': 3, 'x': 4, 's': 3, 'z': 5, 't': 3} sorted: ['z', 'x', 'y', 's', 't']
create new tree node: z
[z:5] -> z:1(4533701448)
create new tree node: x
[x:4] -> x:1(4533701808)
create new tree node: y
[y:3] -> y:1(4534040048)
create new tree node: s
[s:3] -> s:1(4534042568)
create new tree node: t
[t:3] -> t:1(4534039688)
tx dict: {'x': 4, 's': 3, 'r': 3} sorted: ['x', 's', 'r']
create new tree node: x
[x:4] -> x:1(4533701808) -> x:1(4534040840)
create new tree node: s
[s:3] -> s:1(4534042568) -> s:1(4534042424)
create new tree node: r
[r:3] -> r:1(4534041200)
tx dict: {'y': 3, 'x': 4, 's': 3, 'z': 5, 't': 3} sorted: ['z', 'x', 'y', 's', 't']
increase z count: 1+1
increase x count: 1+1
increase y count: 1+1
increase s count: 1+1
increase t count: 1+1
tx dict: {'y': 3, 'x': 4, 'r': 3, 't': 3, 'z': 5} sorted: ['z', 'x', 'y', 'r', 't']
increase z count: 2+1
increase x count: 2+1
increase y count: 2+1


In [13]:
# find conditional pattern base
def ascendTree(leafNode, prefixPath):
	if leafNode.parent != None:
		#print 'ascend tree node:', leafNode.name, leafNode.count
		prefixPath.append(leafNode.name)
		ascendTree(leafNode.parent, prefixPath)

def findPrefixPath(basePattern, treeNode):
	condPattBases = {}
	while treeNode != None:
		prefixPath = []
		ascendTree(treeNode, prefixPath)
		
		print 'prefix path:', prefixPath
		if len(prefixPath) > 1:
			condPattBases[frozenset(prefixPath[1:])] = treeNode.count
		
		# check next node by link pointer
		treeNode = treeNode.nodeLink
	return condPattBases

def mineTree(inTree, headerTable, minSupp, prefix, freqItemList):
	# sort items in header table by count
	sortedItems = \
		[v[0] for v in sorted(headerTable.items(), key = lambda p: p[1])]

	for item in sortedItems:
		newFreqSet = prefix.copy()
		newFreqSet.add(item)
		freqItemList.append(newFreqSet)

		# find prefix path by item
		condPattBases = findPrefixPath(item, headerTable[item][1])
		# reuse func to create conditional fp tree
		myCondTree, myHead = createTree(condPattBases, minSupp)
		print 'conditional tree for:', newFreqSet
		if myCondTree != None:
			myCondTree.disp(1)

		if myHead != None:
			mineTree(myCondTree, myHead, minSupp, newFreqSet, freqItemList)

mineTree(fpTree, headerTab, 3, set([]), [])

prefix path: ['t', 's', 'y', 'x', 'z']
prefix path: ['t', 'r', 'y', 'x', 'z']
[y:3] -> None
[x:3] -> None
[z:3] -> None
tx dict: {'y': 3, 'x': 3, 'z': 3} sorted: ['y', 'x', 'z']
create new tree node: y
[y:3] -> y:2(4535094520)
create new tree node: x
[x:3] -> x:2(4535094304)
create new tree node: z
[z:3] -> z:2(4535094592)
tx dict: {'y': 3, 'x': 3, 'z': 3} sorted: ['y', 'x', 'z']
increase y count: 2+1
increase x count: 2+1
increase z count: 2+1
[y:3] -> y:3(4535094520)
[x:3] -> x:3(4535094304)
[z:3] -> z:3(4535094592)
conditional tree for: set(['t'])
  [*] 1 ( 4534033368 )
   y 3 ( 4535094520 )
    x 3 ( 4535094304 )
     z 3 ( 4535094592 )
prefix path: ['x', 'y']
[y:3] -> None
tx dict: {'y': 3} sorted: ['y']
create new tree node: y
[y:3] -> y:3(4535094736)
[y:3] -> y:3(4535094736)
conditional tree for: set(['x', 't'])
  [*] 1 ( 4535094664 )
   y 3 ( 4535094736 )
prefix path: ['y']
conditional tree for: set(['y', 'x', 't'])
prefix path: ['y']
conditional tree for: set(['y', 't'])
prefi