In [30]:
import ds
import spookyhash
import mmh3
from numpy import random
import os
import time
import operator
import hyperloglog

# ==========================data structure==========================
class Node():
    def __init__(self,count=0):
        self.count=count
    def add_count(self,count=1):
        self.count+=count
    def __str__(self):
        return 'count: {}'.format(self.count)
    def __repr__(self):
        return ''

class Head(Node):
    def __init__(self,count=1):
        super().__init__(count)
        self.distinct = hyperloglog.HyperLogLog(0.01)
        self.maxID=''
    def __str__(self):
        return '[total count: {}, distinct: {}, max: {}]'.format(self.count,len(self.distinct),self.maxID)
    def __repr__(self):
        return '[total count: {}, distinct: {}, max: {}]'.format(self.count,len(self.distinct),self.maxID)

class Tail(Node):
    def __init__(self,ID,count):
        self.ID = ID
        super().__init__(count)
    def __str__(self):
        return '[ID: {}, count: {}]'.format(self.ID,self.count)
    def __repr__(self):
        return '[ID: {}, count: {}]'.format(self.ID,self.count)
# ==========================data structure==========================



# ========================== Update Sketch==========================
def UpdateSk(element,width,depth):
    e_max=get_emax()
    hash1=spookyhash.hash32(bytes(str(element.ID),encoding='utf-8'))
        # input of spooky hash: binary, encoding is parameter of bytes()
        # output of spooky hash: unsigned- 32 bit int
    hash2=mmh3.hash(str(hash1), signed=False)
        # input of mmh3 is str, output unsigned- 32 bit int
    row=hash1 % depth
        # index of row in SK[row]
    ID=hash2 % ((width*3)//2)
        # hash-value(ID) of e
        # range: 2^k *3/2= 3*(2^k-1)
    match=False
        # match= True if e is in Sk[row]
    index=0
    avg=0
    item=Tail(ID, element.count)
        # ID in Sketch is hash value
    print("\t{} -> {},send to Sk[{}]".format(element,item,row))
    # print("id(Sk[{}]):{}".format(row,id(Sk[row])))
    Sk_head[row].count+=item.count
        # total count+=count
    # ==========================update sketch==========================
    if len(Sk[row])==0:
        print("len(Sk[row])<width:{}".format(len(Sk[row])<width))
        # Sk[row] is empty,append e directly  
        Sk[row].append(item)
        Sk_head[row].maxID=element.ID
        match=True
        index=0
    else:
        # Sk[row] is not empty
        index=find(item,Sk[row])
        print("index={}".format(index))
        if index >=0:
            # matched in Sk[row]
            Sk[row][index].count+=item.count
            match=True
        else:
            # doesn't match in Sk[row]
            if len(Sk[row])<width:
                # Sk[row] is not full
                Sk[row].append(item)
                match=True
                index=len(Sk[row])-1
            else:
                # Sk[row] is full
                count_sum=sum(Sk[row][i].count for i in range(len(Sk[row])))
                print("count_sum={}".format(count_sum))
                print("total count={}".format(Sk_head[row].count))
                if Sk_head[row].count==count_sum:
                    # no element in other
                    # print("no element in Other, send to distinct")
                    Sk_head[row].distinct.add(element.ID)
                    match=False
                elif Sk_head[row].count>count_sum:
                    if len(Sk_head[row].distinct)>0:
                        avg=(Sk_head[row].count-count_sum)//len(Sk_head[row].distinct)
                        print("avg:{}".format(avg))
                        if avg>Sk[row][-1].count:
                            # print("avg>last one, update Sk[{}]".format(row))
                            Sk[row][-1].ID=item.ID
                            Sk[row][-1].count=avg
                            match=True
                            index=-1
                        else:
                            Sk_head[row].distinct.add(element.ID)
                            # print("avg< last one,send to distinct")
                            match=False
            
                        
    # now we have:
    #  index: incoming e in Sk[row]
    #  match: whether e is in Sk[row]
    #  ID: hash(e)
    #  
    # ==========================update e_max==========================
    print("\nUpdate e_max in UpdateSK():")
    print("Sk_head[{}]: {}".format(row,Sk_head[row]))
    print("e_max before update:{}".format(e_max))
    # Sk[row].sort(key=operator.attrgetter('count'),reverse=True)
    
    # update local max

    # update global max 
    h1=spookyhash.hash32(bytes(str(Sk_head[row].maxID),encoding='utf-8'))
    h2=mmh3.hash(str(h1), signed=False)
    max_id=h2 % ((width*3)//2)
    max_id_index=find(Tail(max_id,1),Sk[row])
    print("Sk[{}]:{} before update e_max".format(row,Sk[row]))
    print("index:{},max_id:{}, max_id_index:{}, in Sk[{}]".format(index,max_id,max_id_index,row))
    print("match={}".format(match))
    if max_id_index>=0:
        if match:
            # incoming element in Sk[row]:
            print("Sk_head[{}].maxID: {}".format(row,Sk_head[row].maxID))
            if Sk_head[row].maxID=="":
                Sk_head[row].maxID=element.ID
            elif element.ID==Sk_head[row].maxID:
                # update e_max
                if Sk[row][index].count>e_max.count:
                    e_max.ID=element.ID
                    e_max.count=Sk[row][index].count
                else:
                    pass
                # element.ID ≠ maxID
            if Sk[row][index].count>=Sk[row][max_id_index].count:
                Sk_head[row].maxID=element.ID
                if Sk[row][index].count>e_max.count:
                    e_max.ID=element.ID
                    e_max.count=Sk[row][index].count
        else:
            # e not in Sk[row]
            if avg>Sk[row][max_id_index].count:
                Sk_head[row].maxID=element.ID
                if avg>e_max.count:
                    e_max.ID=element.ID
                    e_max.count=avg
    else:
        Sk_head[row].maxID=element.ID

    Sk[row].sort(key=operator.attrgetter('count'),reverse=True)
    
    # print("after sorting id(Sk[{}]):{}".format(row,id(Sk[row])))

    for i in range(len(Sk)):
        print("Sk[{}]:{},{}".format(i,Sk_head[i],Sk[i]))
    print("e_max:{} after UpdateSK()".format(e_max))

# ========================== Update Sketch==========================

def Update_emax(head_list):
    print("\nIn Update_emax(Sk_head):")
    print("\t{}".format(head_list))
    e_max=get_emax()
    print("id(e_max):{}".format(id(e_max)))
    for item in head_list:
        print("item in Sk_head[]:{}".format(item))
        if item.maxID !='':
            hash1=spookyhash.hash32(bytes(str(item.maxID),encoding='utf-8'))
            hash2=mmh3.hash(str(hash1), signed=False)
            ID=hash2 % ((width*3)//2)
            row=hash1 % depth
            index=find(Tail(ID,1),Sk[row])
            print("row:{}".format(row))
            print("Sk[{}]:{}".format(row,Sk[row]))
            print("id of local max:{}".format(ID))
            print("index of local max:{}".format(index))
            if Sk[row][index].count >e_max.count:
                e_max.ID=item.maxID
                e_max.count=Sk[row][index].count
        else:
            pass
        

# ========================== BringBack=========================
def BringBack(e_min,e_max):
    print("\nIn BringBack({},{})".format(e_min,e_max))
    e_max=get_emax()
    # print('e_max at first Bringback:{},id(e_max):{}'.format(e_max,id(e_max)))
    temp=Tail(e_min.ID,e_min.count)
    Top[-1].ID=e_max.ID
    Top[-1].count=e_max.count
    Top.sort(key=operator.attrgetter('count'),reverse=True)
    print('Top after BringBack:\n\t{}'.format(Top))
    DeleteSk(e_max)
    # update e_max in Sk[row]
    Update_emax(Sk_head)
    print("e_max after delete:{},id(e_max):{}".format(e_max,id(e_max)))
    UpdateSk(temp,width,depth)
    
    # print("Sk[] after Update {}:\n\t{}".format(e_min,Sk))

# ========================== BringBack=========================
# ==========================DeleteSk=========================
def DeleteSk(element):
    # 刪除e_max in Sk[row]，update local max
    e_max=get_emax()
    print("\nIn DeleteSK({})".format(element))
    hash1=spookyhash.hash32(bytes(str(element.ID),encoding='utf-8'))
    hash2=mmh3.hash(str(hash1), signed=False)
    ID=hash2 % ((width*3)//2)
    row=hash1 % depth
    print("row:{},ID:{}".format(row,ID))
    Sk_head[row].count-=element.count
    print("Sk[{}]:{}".format(row,Sk[row]))
    index=find(Tail(ID,1),Sk[row])
    print("index:{}".format(index))
    if index<0:
        pass
    else:
        Sk[row].pop(index)
        Sk[row].sort(key=operator.attrgetter('count'),reverse=True)
        Sk_head[row].maxID=""
    e_max.ID=""
    e_max.count=0
    
    print("e_max After DeleteSk(element):{},id(e_max):{}".format(e_max,id(e_max)))
    '''
    print("index:{}".format(index))
    print('After pop:')
    print("\tSk[{}]={}".format(row,Sk[row]))
    print("Top:\n\t{}".format(Top))
    '''
# ==========================Tools=========================
def get_emax():
    return e_max

def find(e,element_list):
    try:
        # type(element_list[i].ID):int
        # print("e:{}".format(e))
        # print("element_list[1:]: {}".format(element_list[1:]))
        index=[ele.ID for ele in element_list].index(e.ID)
    except:
        # print("raise error")
        index=-99
    return index

# ========================main==============================

start=time.time()
filename='kosarak.dat'
filepath="..\\dataset\\"
src_data=os.path.join(filepath,filename)
depth=4
width=4
size=8

Top=[]
Sk_head=[Head(0) for j in range(depth)]
Sk=[[] for i in range(depth)]
item_count=150
i=0
e_max=Tail('',0)

with open(src_data,'r') as file:
    while item_count:
        element=file.readline().strip('\n')
        if not element:
            break
        else:
            i+=1
            print("\nread {}-th element: {}".format(i,element))
            item_count-=1
            item=Tail(element,1)
            if len(Top)==0:
                Top.append(item)
                # print("len(Top)==0, Top append:{}".format(Top))
            else:
                index=find(item,Top)
                # print("index={}".format(index))
                if index<0:
                    if len(Top)<size:
                        Top.append(item)
                        # print("index={},Top after append: {},\nlen(Top):{}".format(index,Top,len(Top)))
                    else:
                        print("send {} into Sk".format(element))
                        UpdateSk(item,width,depth)
                else:
                    # print("update Top[{}]:".format(index))
                    Top[index].count+=1
            print("Top after read {}:\n\t{}\n".format(element,Top))
            Top.sort(key=operator.attrgetter('count'),reverse=True)
            if e_max.count>Top[-1].count:
                BringBack(Top[-1],e_max)
                # print('Top after BringBack: \n\t{}'.format(Top))            
            # print("TOP:{}".format(Top))
end=time.time()
print("e_max:{}".format(e_max))
print("Execution time:{} seconds.".format(str(end-start)))


read 1-th element: 1 2 3
Top after read 1 2 3:
	[[ID: 1 2 3, count: 1]]


read 2-th element: 1
Top after read 1:
	[[ID: 1 2 3, count: 1], [ID: 1, count: 1]]


read 3-th element: 4 5 6 7
Top after read 4 5 6 7:
	[[ID: 1 2 3, count: 1], [ID: 1, count: 1], [ID: 4 5 6 7, count: 1]]


read 4-th element: 1 8
Top after read 1 8:
	[[ID: 1 2 3, count: 1], [ID: 1, count: 1], [ID: 4 5 6 7, count: 1], [ID: 1 8, count: 1]]


read 5-th element: 9 10
Top after read 9 10:
	[[ID: 1 2 3, count: 1], [ID: 1, count: 1], [ID: 4 5 6 7, count: 1], [ID: 1 8, count: 1], [ID: 9 10, count: 1]]


read 6-th element: 11 6 12 13 14 15 16
Top after read 11 6 12 13 14 15 16:
	[[ID: 1 2 3, count: 1], [ID: 1, count: 1], [ID: 4 5 6 7, count: 1], [ID: 1 8, count: 1], [ID: 9 10, count: 1], [ID: 11 6 12 13 14 15 16, count: 1]]


read 7-th element: 1 3 7
Top after read 1 3 7:
	[[ID: 1 2 3, count: 1], [ID: 1, count: 1], [ID: 4 5 6 7, count: 1], [ID: 1 8, count: 1], [ID: 9 10, count: 1], [ID: 11 6 12 13 14 15 16, count: 1], [I

item in Sk_head[]:[total count: 3, distinct: 0, max: ]
e_max after delete:[ID: 6 3 71, count: 2],id(e_max):2196014014360
	[ID: 11 6 56 57 58 59 60 61 62 63 64, count: 2] -> [ID: 4, count: 2],send to Sk[1]
index=0

Update e_max in UpdateSK():
Sk_head[1]: [total count: 7, distinct: 0, max: 6 3 71]
e_max before update:[ID: 6 3 71, count: 2]
Sk[1]:[[ID: 4, count: 4], [ID: 5, count: 2], [ID: 0, count: 1]] before update e_max
index:0,max_id:5, max_id_index:1, in Sk[1]
match=True
Sk_head[1].maxID: 6 3 71
Sk[0]:[total count: 4, distinct: 0, max: ],[[ID: 4, count: 2], [ID: 3, count: 1], [ID: 2, count: 1]]
Sk[1]:[total count: 7, distinct: 0, max: 11 6 56 57 58 59 60 61 62 63 64],[[ID: 4, count: 4], [ID: 5, count: 2], [ID: 0, count: 1]]
Sk[2]:[total count: 6, distinct: 0, max: 11 70 6],[[ID: 5, count: 2], [ID: 2, count: 2], [ID: 3, count: 1], [ID: 1, count: 1]]
Sk[3]:[total count: 3, distinct: 0, max: ],[[ID: 2, count: 1], [ID: 0, count: 1], [ID: 1, count: 1]]
e_max:[ID: 11 6 56 57 58 59 60 61 62


read 78-th element: 14
send 14 into Sk
	[ID: 14, count: 1] -> [ID: 1, count: 1],send to Sk[1]
index=3

Update e_max in UpdateSK():
Sk_head[1]: [total count: 9, distinct: 0, max: 1 6 138 274 77]
e_max before update:[ID: 316, count: 3]
Sk[1]:[[ID: 5, count: 2], [ID: 0, count: 1], [ID: 2, count: 1], [ID: 1, count: 2]] before update e_max
index:3,max_id:1, max_id_index:3, in Sk[1]
match=True
Sk_head[1].maxID: 1 6 138 274 77
Sk[0]:[total count: 10, distinct: 0, max: 269 11 6 296 38 297 298 299 300 301 302 303 141 304 305 306 307 308 309 273 3 310 311 312 313],[[ID: 3, count: 3], [ID: 1, count: 3], [ID: 4, count: 2], [ID: 2, count: 1]]
Sk[1]:[total count: 9, distinct: 0, max: 14],[[ID: 5, count: 2], [ID: 1, count: 2], [ID: 0, count: 1], [ID: 2, count: 1]]
Sk[2]:[total count: 14, distinct: 0, max: 1 6 139 140 90 141 142 143 144],[[ID: 1, count: 4], [ID: 5, count: 3], [ID: 4, count: 2], [ID: 3, count: 2]]
Sk[3]:[total count: 6, distinct: 0, max: 316],[[ID: 4, count: 3], [ID: 2, count: 1], [ID

e_max before update:[ID: 484 1 545 546 547, count: 5]
Sk[3]:[[ID: 2, count: 5], [ID: 0, count: 4], [ID: 4, count: 3], [ID: 3, count: 1]] before update e_max
index:2,max_id:2, max_id_index:0, in Sk[3]
match=True
Sk_head[3].maxID: 11 565
Sk[0]:[total count: 16, distinct: 0, max: 11 6 64],[[ID: 1, count: 4], [ID: 3, count: 4], [ID: 2, count: 2], [ID: 5, count: 1]]
Sk[1]:[total count: 22, distinct: 0, max: 554 11 314 6 2 89 555 556 557 241 3 558 102 104 559 119 560 561 562 126 563 564],[[ID: 2, count: 5], [ID: 1, count: 4], [ID: 5, count: 3], [ID: 0, count: 1]]
Sk[2]:[total count: 22, distinct: 0, max: 484 1 545 546 547],[[ID: 5, count: 5], [ID: 2, count: 4], [ID: 4, count: 4], [ID: 1, count: 3]]
Sk[3]:[total count: 20, distinct: 0, max: 11 565],[[ID: 2, count: 5], [ID: 0, count: 4], [ID: 4, count: 3], [ID: 3, count: 1]]
e_max:[ID: 484 1 545 546 547, count: 5] after UpdateSK()
Top after read 145 11 571 572:
	[[ID: 69 11 218 6 215 228 229 230 231 232 233 234 235 236 237 90 238 239 240 241 2

In [41]:
import spookyhash
import mmh3
from numpy import random
import os
import time
import operator
import hyperloglog

# ==========================data structure==========================
class Node():
    def __init__(self,count=0):
        self.count=count
    def add_count(self,count=1):
        self.count+=count
    def __str__(self):
        return 'count: {}'.format(self.count)
    def __repr__(self):
        return ''

class Head(Node):
    def __init__(self,count=1):
        super().__init__(count)
        self.distinct = hyperloglog.HyperLogLog(0.01)
        self.maxID=''
    def __str__(self):
        return '[total count: {}, distinct: {}, max: {}]'.format(self.count,len(self.distinct),self.maxID)
    def __repr__(self):
        return '[total count: {}, distinct: {}, max: {}]'.format(self.count,len(self.distinct),self.maxID)

class Tail(Node):
    def __init__(self,ID,count):
        self.ID = ID
        super().__init__(count)
    def __str__(self):
        return '[ID: {}, count: {}]'.format(self.ID,self.count)
    def __repr__(self):
        return '[ID: {}, count: {}]'.format(self.ID,self.count)
# ==========================data structure==========================


# ==========================UpdateSk==========================
def UpdateSk(element,width,depth):
    e_max=get_emax()
    hash1=spookyhash.hash32(bytes(str(element.ID),encoding='utf-8'))
        # input of spooky hash: binary, encoding is parameter of bytes()
        # output of spooky hash: unsigned- 32 bit int
    hash2=mmh3.hash(str(hash1), signed=False)
        # input of mmh3 is str, output unsigned- 32 bit int
    row=hash1 % depth
        # index of row in SK[row]
    ID=hash2 % ((width*3)//2)
        # hash-value(ID) of e
        # range: 2^k *3/2= 3*(2^k-1)
    match=False
        # match= True if e is in Sk[row]
    index=0
    avg=0
    item=Tail(ID, element.count)
        # ID in Sketch is hash value
    print("{} -> {},send to Sk[{}]".format(element,item,row))

    # ==========================update sketch==========================
    index=find(item,Sk[row])
        # 先find
    if index >=0:
        # e matches in Sk[row]
        Sk[row][index].count+=item.count
        match=True
    else:
        # e doesn't match in Sk[row]
        if len(Sk[row])==0:
            Sk[row].append(item)
            index=0
            Sk_head[row].maxID=element.ID
            match=True
        elif len(Sk[row])<width:
            # Sk[row] is not full
            Sk[row].append(item)
            index=len(Sk[row])-1            
            match=True
        else:
            # Sk[row] is full
            count_sum=sum(Sk[row][i].count for i in range(len(Sk[row])))
            if Sk_head[row].count==count_sum:
                # no other, send e into Other
                Sk_head[row].distinct.add(element.ID)
                match=False
            else:
                # something in other and total count > sum
                if len(Sk_head[row].distinct)>0:
                    avg=(Sk_head[row].count-count_sum)//len(Sk_head[row].distinct)
                    print("avg:{}".format(avg))
                else:
                    Sk_head[row].distinct.add(element.ID)
                    # print("avg< last one,send to distinct")
                    match=False
    Sk_head[row].count+=item.count
        # total count最後再+1 因為要compare distinct
    # ==========================update local max==========================
    Update_local_max(Sk_head[row],Sk[row],avg,element,index)
    # Sk[row].sort(key=operator.attrgetter('count'),reverse=True)

    # ==========================update e_max==========================
    Update_emax(Sk_head[row],Sk[row],e_max,element)
    
    Sk[row].sort(key=operator.attrgetter('count'),reverse=True)
    for i in range(len(Sk)):
        print("Sk[{}]:{},{}".format(i,Sk_head[i],Sk[i]))
# ==========================update local max==========================    
def Update_local_max(head_item,element_list,avg,incoming_element,index):
    # Sk_head[row],Sk[row],avg
    print("In Update_local_max")
    local_max_ID=mmh3.hash(str(spookyhash.hash32(bytes(str(head_item.maxID),encoding='utf-8'))), signed=False) % ((width*3)//2)
    #print("local_max_ID:{}".format(local_max_ID))
    local_max_index=find(Tail(local_max_ID,1),element_list)
    #print("local_max_index:{}".format(local_max_index))
    # 此處當hashed ID相同時暫不更新 local max
    if local_max_index>=0:
        # local max exists
        if index>=0:
            if element_list[index].count>element_list[local_max_index].count:
                head_item.maxID=incoming_element.ID
        else:
            if avg>element_list[local_max_index].count:
                head_item.maxID=incoming_element.ID
    else:
        # local max doesn't exists
        head_item.maxID=incoming_element.ID
    

# ==========================update e_max==========================
def Update_emax(head_item,element_list,e_max,incoming_element):
    # Sk_head[row],Sk[row],e_max
    local_max_ID=""
    local_max_index=""
    print("e_max before update:{}".format(e_max))
    print("Sk_head:{}".format(head_item))
    print("Sk[row]:{}".format(element_list))
    if head_item.maxID=="":
        pass
    else:
        local_max_ID=mmh3.hash(str(spookyhash.hash32(bytes(str(head_item.maxID),encoding='utf-8'))), signed=False) % ((width*3)//2)
        #print("local_max_ID:{}".format(local_max_ID))
        local_max_index=find(Tail(local_max_ID,1),element_list)        
        #print("local_max_index:{}".format(local_max_index))
        if local_max_index>=0:
            if element_list[local_max_index].count>e_max.count:
                e_max.ID=head_item.maxID
                e_max.count=element_list[local_max_index].count
        else:
            pass
    print("e_max after update:{}".format(e_max))

# starts here, BringBack, DeleteSK的參數和Update_emax有衝突
# ========================== BringBack=========================
def BringBack(e_min,e_max,element_list,head_list):
    # Top[-1],e_max,Sk,Sk_head
    print("\nIn BringBack({},{})".format(e_min,e_max))
    e_max=get_emax()
    # print('e_max at first Bringback:{},id(e_max):{}'.format(e_max,id(e_max)))
    temp=Tail(e_min.ID,e_min.count)
    Top[-1].ID=e_max.ID
    Top[-1].count=e_max.count
    #Top.sort(key=operator.attrgetter('count'),reverse=True)
    print('Top after BringBack:\n\t{}'.format(Top))
    DeleteSk(e_max,element_list,head_list)
    # update e_max in Sk[row]
    Update_emax(head_list,element_list,e_max,incoming_element)
    print("e_max after delete:{},id(e_max):{}".format(e_max,id(e_max)))
    UpdateSk(temp,width,depth)
    
    # print("Sk[] after Update {}:\n\t{}".format(e_min,Sk))

# ========================== BringBack=========================
# ==========================DeleteSk=========================
def DeleteSk(element,element_list):
    # e_max, Sk[row]
    # 刪除e_max in Sk[row]，update local max
    e_max=get_emax()
    print("\nIn DeleteSK({}):".format(element))
    hash1=spookyhash.hash32(bytes(str(element.ID),encoding='utf-8'))
    hash2=mmh3.hash(str(hash1), signed=False)
    ID=hash2 % ((width*3)//2)
    row=hash1 % depth
    print("row:{},ID:{}".format(row,ID))
    Sk_head[row].count-=element.count
    print("Sk[{}]:{}".format(row,element_list[row]))
    index=find(Tail(ID,1),element_list[row])
    print("index:{}".format(index))
    if index<0:
        pass
    else:
        element_list[row].pop(index)
        element_list[row].sort(key=operator.attrgetter('count'),reverse=True)
        head_list[row].maxID=""
    e_max.ID=""
    e_max.count=0
    
    print("e_max After DeleteSk(element):{},id(e_max):{}".format(e_max,id(e_max)))
    '''
    print("index:{}".format(index))
    print('After pop:')
    print("\tSk[{}]={}".format(row,Sk[row]))
    print("Top:\n\t{}".format(Top))
    '''
# ==========================Tools=========================    
def get_emax():
    return e_max

def find(e,element_list):
    try:
        # type(element_list[i].ID):int
        # print("e:{}".format(e))
        # print("element_list[1:]: {}".format(element_list[1:]))
        index=[ele.ID for ele in element_list].index(e.ID)
    except:
        # print("raise error")
        index=-99
    return index                        


# ==========================main=========================   
        
start=time.time()

filename='kosarak.dat'
filepath="..\\dataset\\"
src_data=os.path.join(filepath,filename)
depth=4
width=4
size=4

Top=[]
Sk_head=[Head(0) for j in range(depth)]
Sk=[[] for i in range(depth)]
item_count=70
i=0
e_max=Tail('',0)

with open(src_data,'r') as file:
    while item_count:
        element=file.readline().strip('\n')
        if not element:
            break
        else:
            i+=1
            print("\nread {}-th element: {}".format(i,element))
            item_count-=1
            item=Tail(element,1)
            if len(Top)==0:
                Top.append(item)
                # print("len(Top)==0, Top append:{}".format(Top))
            else:
                index=find(item,Top)
                # print("index={}".format(index))
                if index<0:
                    if len(Top)<size:
                        Top.append(item)
                        # print("index={},Top after append: {},\nlen(Top):{}".format(index,Top,len(Top)))
                    else:
                        UpdateSk(item,width,depth)
                else:
                    # print("update Top[{}]:".format(index))
                    Top[index].count+=1
            # print("Top after read {}:\n{}\n".format(element,Top))
            Top.sort(key=operator.attrgetter('count'),reverse=True)
            if e_max.count>Top[-1].count:
                BringBack(Top[-1],e_max,Sk)
                # print('Top after BringBack: \n\t{}'.format(Top))            
            # print("TOP:{}".format(Top))


end=time.time()

print("e_max:{}".format(e_max))
print("Execution time:{} seconds.".format(str(end-start)))


read 1-th element: 1 2 3

read 2-th element: 1

read 3-th element: 4 5 6 7

read 4-th element: 1 8

read 5-th element: 9 10
[ID: 9 10, count: 1] -> [ID: 1, count: 1],send to Sk[2]
In Update_local_max
e_max before update:[ID: , count: 0]
Sk_head:[total count: 1, distinct: 0, max: 9 10]
Sk[row]:[[ID: 1, count: 1]]
e_max after update:[ID: 9 10, count: 1]
Sk[0]:[total count: 0, distinct: 0, max: ],[]
Sk[1]:[total count: 0, distinct: 0, max: ],[]
Sk[2]:[total count: 1, distinct: 0, max: 9 10],[[ID: 1, count: 1]]
Sk[3]:[total count: 0, distinct: 0, max: ],[]

read 6-th element: 11 6 12 13 14 15 16
[ID: 11 6 12 13 14 15 16, count: 1] -> [ID: 1, count: 1],send to Sk[0]
In Update_local_max
e_max before update:[ID: 9 10, count: 1]
Sk_head:[total count: 1, distinct: 0, max: 11 6 12 13 14 15 16]
Sk[row]:[[ID: 1, count: 1]]
e_max after update:[ID: 9 10, count: 1]
Sk[0]:[total count: 1, distinct: 0, max: 11 6 12 13 14 15 16],[[ID: 1, count: 1]]
Sk[1]:[total count: 0, distinct: 0, max: ],[]
Sk[2]:[t

In [9]:
print(Top)

[[ID: 1 2 3, count: 1], [ID: 1, count: 1], [ID: 4 5 6 7, count: 1], [ID: 1 8, count: 1]]


In [26]:
s='1 3 7'
mmh3.hash(str(spookyhash.hash32(bytes(s,encoding='utf-8'))), signed=False) % ((width*3)//2)

5