## About Kosarak
- Distinct element: 606770
- Number of incomming element: 990002
- Top-10 frequent element and count: 

| element | frquent |
| :----- | :----- |
| 3 | 36133 |
| 6 | 30225 |
| 6 3 | 21139 |
| 11 6 | 12769 |
| 1 | 9766 |
| 11 6 3 | 9083 |
| 1 3 | 6002 |
| 11 | 5374 |
| 1 6 | 5046 |
| 4 | 4290 |


## Ground Truth

In [None]:
# Ground Truth
import pandas as pd
import os

filename='kosarak.dat'
filepath="..\\dataset\\"
src_data=os.path.join(filepath,filename)

num_line=0
item_count=100000
itemdict={}
with open(src_data,'r') as file:
    while True:
        line=file.readline().strip('\n')
        if not line:
            break
        else:
            #item_count-=1
            num_line+=1
            if line not in itemdict:
                itemdict[line]=1
            else:
                itemdict[line]+=1

df=pd.DataFrame(list(itemdict.items()),columns=['Element', 'Count'])
df=df.reset_index(drop=True)
df=df.sort_values('Count',ascending=False)

print("Distinct element: {}".format(len(itemdict)))
print("Number of incomming element: {}".format(num_line))
print(df.head(20))

df.to_csv("..\\result\\kosarak\\kosarak_ground_truth.csv",index=False)


## Count-min sketch
- Top-1024+ 4*1024 CMS
    - Total memory 9432 bytes :Top-1024 with size 9328 bytes+ CMS with size 104 bytes.
    - Execution time:397.14259004592896 seconds.
    - built-in index search is fastest

In [None]:
# Count-Min sketch with built-in index search
# Top-1024+ 4*1024 CMS
# Total memory 9432 bytes :Top-1024 with size 9328 bytes+ CMS with size 104 bytes.
# Execution time:319.18891406059265 seconds.
# Find:339,TP:0,FP:339

import sys
import os
import time
from probables import (CountMinSketch)
import pandas as pd

def find(e,Topk):
    try:
        index = [ele for ele,i in Topk].index(e)
        return index
    except:
        index=-99
    return index

start=time.time()
filename='kosarak.dat'
filepath="..\\dataset\\"
src_data=os.path.join(filepath,filename)
topk=[]
size=1024
# item_count=10000
cms = CountMinSketch(width=1024, depth=4)

if os.path.exists(src_data):
    with open(src_data,'r') as file:
        while True:
            line=file.readline().strip('\n')
            if not line:
                print('EOF')
                break
            else:
                #item_count-=1
                # print("read {}th element: {}".format(item_count,element))
                cms.add(line)
                count=cms.check(line)
                if len(topk)==0:
                    topk.append([line,count])
                else:
                    index=find(line,topk)
                    if index<0:
                        #  element not in topk
                        if len(topk)<size:
                            topk.append([line,count])
                        else:
                            topk[-1][0]=line
                            topk[-1][1]=count
                    else:
                        topk[index][1]=count
                topk=sorted(topk,key = lambda topk:topk[1],reverse=True)
                    
    end=time.time()
    print(topk[:20],len(topk))
    print("Total memory {3} bytes :Top-{0} with size {1} bytes+ CMS with size {2} bytes.".format(len(topk),sys.getsizeof(topk),sys.getsizeof(cms),sys.getsizeof(cms)+sys.getsizeof(topk)))
    print("Execution time:{} seconds.".format(str(end-start)))
else:
    print("file doesn't exist")
#　conver Top into df    
templi=[]
for i in topk:
    templi.append([i[0],i[1]])

df=pd.DataFrame(templi,columns=['ID', 'Count'])
df.to_csv("..\\result\\kosarak\\CM_kosarak.csv",index=False)
df.head(50)


In [None]:
# Result compare
# ARE: (1/distinct element)*(abs(f-f*)/f)

import pandas as pd
import os
path='..\\result\\kosarak'
groundtruth='kosarak_ground_truth.csv'
final='CM_kosarak.csv'

grtruth=pd.read_csv(os.path.join(path,groundtruth))
My_result1=pd.read_csv(os.path.join(path,final))

gli = grtruth.values.tolist()
li1= My_result1.values.tolist()

tp=0
fp=0
find=0
err=[]
error=0
for item in li1:
    for element in gli[:1024]:
        if item[0]==element[0]:
            # print("{},{} vs. {},{}".format(item[0],item[1],element[0],element[1]))
            find+=1
            if item[1]==element[1]:
                tp+=1
            else:
                fp+=1
                error+=abs(item[1]-element[1])/item[1]
print("Find:{},TP:{},FP:{}".format(find,tp,fp))
#print("ARE:{}".format(error/606770))

## Space Saving
- Top 1024 with nodes
    - Total memory 9024 bytes :Top-1024 with size 9024 bytes.
    - Execution time:356.0353424549103 seconds.
    - Find:158,TP:26,FP:132

In [None]:
import sys
import os
import time
import operator
import pandas as pd

class Node():
    def __init__(self,count=0):
        self.count=count
    def add_count(self,count=1):
        self.count+=count
    def __str__(self):
        return 'ID: {}, count: {}'.format(self.ID,self.count)
    def __repr__(self):
        return ''

class Head(Node):
    def __init__(self):
        super().__init__()
        self.distinct = hyperloglog.HyperLogLog(0.01)
    def __str__(self):
        return 'total count: {}, distinct element: {}'.format(self.count,len(self.distinct))
    def __repr__(self):
        return '[count: {}, distinct: {}]'.format(self.count,len(self.distinct))

class Tail(Node):
    def __init__(self,ID,count):
        self.ID = ID
        super().__init__(count)
    def __str__(self):
        return 'ID: {}, count: {}'.format(self.ID,self.count)
    def __repr__(self):
        return "'{}', count: {}".format(self.ID,self.count)

def find(e,element_list):
    try:
        index = [ele.ID for ele in element_list].index(e.ID)
    except:
        index=-99
    return index  
    
    
start=time.time()

filename='kosarak.dat'
filepath="..\\dataset\\"
src_data=os.path.join(filepath,filename)

size=1024
Top=[]
#item_count=100000

with open(src_data,'r') as file:
    while True:
        element=file.readline().strip('\n')
        if not element:
            print('EOF')
            break
        else:
            item=Tail(element,1)
            #item_count-=1
            # print("read {}th element: {}".format(item_count,element))
            if len(Top)==0:
                Top.append(item)
            else:
                index=find(item,Top)
                if index<0:
                    if len(Top)<size:
                        Top.append(item)
                    else:
                        # replace last element with count 
                        Top[-1].ID=item.ID
                        Top[-1].count+=1
                else:
                    Top[index].count+=1
            Top.sort(key=operator.attrgetter('count'),reverse=True)

end=time.time()
print(Top[:20],len(Top))
print("Total memory {0} bytes :Top-{1} with size {0} bytes.".format(sys.getsizeof(Top),size))
print("Execution time:{} seconds.".format(str(end-start)))


#　conver Top into df    
templi=[]
for i in Top:
    templi.append([i.ID,i.count])

df=pd.DataFrame(templi,columns=['ID', 'Count'])
df.to_csv("..\\result\\kosarak\\SS_class_kosarak.csv",index=False)
df.head(20)


In [None]:
# Result compare
# ARE: (1/distinct element)*(abs(f-f*)/f)

import pandas as pd
import os
path='..\\result\\kosarak'
groundtruth='kosarak_ground_truth.csv'
final='SS_class_kosarak.csv'

grtruth=pd.read_csv(os.path.join(path,groundtruth))
My_result1=pd.read_csv(os.path.join(path,final))

gli = grtruth.values.tolist()
li1= My_result1.values.tolist()

tp=0
fp=0
find=0
err=[]
error=0
for item in li1:
    for element in gli[:1024]:
        if item[0]==element[0]:
            # print("{},{} vs. {},{}".format(item[0],item[1],element[0],element[1]))
            find+=1
            if item[1]==element[1]:
                tp+=1
            else:
                fp+=1
                error+=abs(item[1]-element[1])/item[1]
print("Find:{},TP:{},FP:{}".format(find,tp,fp))
#print("ARE:{}".format(error/606770))

In [None]:
## ARE

import pandas as pd
import os
path='..\\result\\kosarak'
groundtruth='kosarak_ground_truth.csv'
final='SS_class_kosarak.csv'

grtruth=pd.read_csv(os.path.join(path,groundtruth))
My_result1=pd.read_csv(os.path.join(path,final))

gli = grtruth.values.tolist()
li= My_result1.values.tolist()

err=0
for item in li:
    index=[j[0] for j in gli[:]].index(item[0])
    # print("index of {}:{}".format(item,))
    err+=abs(item[1]-gli[index][1])/item[1]
print(err/606770)


## My algo

In [None]:
# hashed value list version.
# Top 1024, Sketch[4*128]

import spookyhash
import mmh3
from numpy import random
import os
import pandas as pd
import time
import operator
import hyperloglog
import sys

# ==========================data structure==========================
class Node():
    def __init__(self,count=0):
        self.count=count
    def add_count(self,count=1):
        self.count+=count
    def __str__(self):
        return 'count: {}'.format(self.count)
    def __repr__(self):
        return ''

class Head(Node):
    def __init__(self,count=1):
        super().__init__(count)
        self.distinct = hyperloglog.HyperLogLog(0.01)
        self.maxID=''
    def __str__(self):
        return '[total count: {}, distinct: {}, max: {}]'.format(self.count,len(self.distinct),self.maxID)
    def __repr__(self):
        return '[total count: {}, distinct: {}, max: {}]'.format(self.count,len(self.distinct),self.maxID)

class Tail(Node):
    def __init__(self,ID,count):
        self.ID = ID
        super().__init__(count)
    def __str__(self):
        return '[ID: {}, count: {}]'.format(self.ID,self.count)
    def __repr__(self):
        return '[ID: {}, count: {}]'.format(self.ID,self.count)

# ==========================UpdateSk==========================
def UpdateSk(element,Sk_head,Sk):
    # print("In UpdateSk()")
    e_max=get_emax()
    ID,row=position(element)
        # row position of e and ID=h(e) in Sketch 
    avg=0
    item=Tail(ID, element.count)
        # ID in Sketch is hash value
    #print("{} -> {},send to Sk[{}]".format(element,item,row))
    # ==========================update sketch==========================
    index=find(item,Sk[row])
        # find index of e in Sk[row]
    Sk_head[row].count+=item.count
    if index >=0:
        # e matches in Sk[row]
        Sk[row][index].count+=item.count
    else:
        # e doesn't match in Sk[row]
        if len(Sk[row])<width:
            Sk[row].append(item)
            index=len(Sk[row])-1
        else:
            Sk_head[row].distinct.add(element.ID)
            
    Update_local_max(Sk_head[row],Sk[row],element,index)
    Update_emax(Sk_head,Sk)
    
    # now we have:
    # ID: h2(h1(e)), hash value of e
    # row: h1(e), row index of e
    # index: column index of e
    # Sk_head[row].count: total count of Sk[row]
    # Sk_head[row].maxID: local max count element of Sk[row]
    # Sk_head[row].distinct: estimated element of Other part in Sk[row]
    # count_sum: sum of count in Sketch[row]
    # avg: average count of Other part in Sk[row]

    
    # Sk[row].sort(key=operator.attrgetter('count'),reverse=True)
    # print("e_max:{}".format(e_max))
    # print("Sk_head[{}]:{}".format(row,Sk_head[row]))
    
    '''
    print("e_max:{}".format(e_max))
    for i in range(len(Sk)):
        print("Sk[{}]:{},{}".format(i,Sk_head[i],Sk[i]))
    print('')    
    '''


# ==========================update local max==========================       
def Update_local_max(head_item,element_list,element,index):
    #print("In Update_local_max, Sk[row]:{}".format(element_list))
    numertor,denominator=get_fraction()
    width,depth=get_width_depth()
    if head_item.maxID=='':
        head_item.maxID=element.ID
    else:
        local_max_ID=(mmh3.hash(head_item.maxID, signed=False))% ((width*numertor)//denominator)
        local_max_index=find(Tail(local_max_ID,1),element_list)
        # print("local_max_index:{}".format(local_max_index))
        if local_max_index>=0:
            if index>=0:
                if element_list[index].count >element_list[local_max_index].count:
                    head_item.maxID=element.ID
            else:
                # index=-99
                count_sum=sum(element_list[i].count for i in range(len(element_list)))
                try:
                    avg=(head_item.count-count_sum)//(width*((numerator/denominator)-1))
                except ZeroDivisionError:
                    print("len(head_item.distinct)={}".format(len(self.distinct)))
                else:
                    if avg>element_list[local_max_index].count:
                        head_item.maxID=element.ID
        else:
            #　local_max_inde＝-99, local max is in Other
            count_sum=sum(element_list[i].count for i in range(len(element_list)))
            try:
                avg=(head_item.count-count_sum)//(width*((numerator/denominator)-1))
            except ZeroDivisionError:
                print("len(head_item.distinct)={}".format(len(self.distinct)))
            else:
                if index>=0:
                    if element_list[index].count >avg:
                        head_item.maxID=element.ID
                else:
                    head_item.maxID=element.ID
 # ==========================update e_max==========================
def Update_emax(head,sketch):
    e_max=get_emax()
    numerator,denominator=get_fraction()
    width,depth=get_width_depth()
    for i in range(len(head)):
        if head[i].maxID=='':
            continue
        else:
            local_max_ID=(mmh3.hash(head[i].maxID, signed=False))% ((width*numerator)//denominator)
            local_max_index=find(Tail(local_max_ID,1),sketch[i])
            if local_max_index>=0:
                if sketch[i][local_max_index].count>e_max.count:
                    e_max.ID=head[i].maxID
                    e_max.count=sketch[i][local_max_index].count
# ========================== BringBack=========================
def BringBack(e_min,head,sketch):
    # Bring e_max back into Top
    # Sk_head,Sk[row]
    e_max=get_emax()
    #print("\nIn BringBack():")
    temp=Tail(e_min.ID,e_min.count)
    e_min.ID=e_max.ID
    e_min.count=e_max.count
    # print('Top after BringBack:\n\t{}'.format(Top))
    DeleteSk(e_max,head,sketch)
    # update e_max in Sk[row]
    # print("e_max after delete:{},id(e_max):{}".format(e_max,id(e_max)))
    UpdateSk(temp,head,sketch)
    # print("Sk[] after Update {}:\n\t{}".format(e_min,Sk))

# ========================== BringBack=========================
# ==========================DeleteSk=========================
def DeleteSk(element,head,sketch):
    # 刪除e_max in Sk[row]
    #print("\nIn DeleteSK({}):".format(element))
    width,depth=get_width_depth()
    ID,row=position(element)
    # print("row:{},ID:{} of e_max".format(row,ID))
    head[row].count-=element.count
        # total_count=total_count-e_max.count
    # print("Sk[{}]:{}".format(row,sketch[row]))
    index=find(Tail(ID,1),sketch[row])
    # print("index:{} in Sk[{}]".format(index,row))
    if index>=0:
        sketch[row].pop(index)
        # element_list[row].sort(key=operator.attrgetter('count'),reverse=True)
        head[row].maxID=""

    element.ID=""
    element.count=0
    # print("e_max After DeleteSk(element):{},id(e_max):{}".format(e_max,id(e_max)))
# ==========================Tools=========================    
    
# ==========================Tools=========================    
def get_emax():
    return e_max
def get_width_depth():
    return width,depth

def find(e,element_list):
    # return index of e in element_list
    try:
        index=[ele.ID for ele in element_list].index(e.ID)
    except:
        index=-99
    return index

def position(element):
    numertor,denominator=get_fraction()
    width,depth=get_width_depth()
    hash1=spookyhash.hash32(bytes(str(element.ID),encoding='utf-8'))
        # input of spooky: byte
        # output of spooky:unsigned- 32 bit int
    hash2=mmh3.hash(element.ID, signed=False)
        # input of mmh: str
        # output: unsigned- 32 bit int
    ID=hash2 % ((width*numertor)//denominator)
    row=hash1 % depth
    return ID,row
def get_fraction():
    return numerator,denominator

# ==========================main=========================

filename='kosarak.dat'
filepath="..\\dataset\\"
src_data=os.path.join(filepath,filename)
depth=4
width=128
size=1024
numerator=12
denominator=10

start=time.time()
Top=[]
Sk_head=[Head(0) for j in range(depth)]
Sk=[[] for i in range(depth)]

e_max=Tail('',0)
item_count=10000
with open(src_data,'r') as file:
    while True:
        element=file.readline().strip('\n')
        if not element:
            break
        else:
            #item_count-=1
            item=Tail(element,1)
            index=find(item,Top)
            # print("index={}".format(index))
            if index<0:
                if len(Top)<size:
                    Top.append(item)
                    # print("index={},Top after append: {},\nlen(Top):{}".format(index,Top,len(Top)))
                else:
                    item=Tail(element,1)
                    UpdateSk(item,Sk_head,Sk)
            else:
                # print("update Top[{}]:".format(index))
                Top[index].count+=1
        Top.sort(key=operator.attrgetter('count'),reverse=True)
        # ID,row=position(Top[-1])
        if e_max.count>Top[-1].count:
            BringBack(Top[-1],Sk_head,Sk)
            #print('Top after BringBack: \n\t{}'.format(Top))            


end=time.time()
print("Sk_head:{}".format(Sk_head))
print("TOP[20]:{}".format(Top[:20]))
# print("\nSketch:{}".format(Sk))
print("Total memory {} bytes".format(sys.getsizeof(Top)+sys.getsizeof(Sk)+sys.getsizeof(Sk_head)))
print("Top:{} bytes, Sketch:{} bytes, Sketch_head:{} bytes.".format(sys.getsizeof(Top),sys.getsizeof(Sk),sys.getsizeof(Sk_head)))
print("Execution time:{} seconds.".format(str(end-start)))


templi=[]
for i in Top:
    templi.append([i.ID,i.count])

df=pd.DataFrame(templi,columns=['ID', 'Count'])
df.to_csv("..\\result\\kosarak\\My_kosarak.csv",index=False)
df.head(20)

## My algo:hash-map version

In [1]:
# hash-map array version.
import numpy as np
import spookyhash
import mmh3
import os
import pandas as pd
import time
import operator
import hyperloglog
import sys

# ==========================data structure==========================
class Node():
    def __init__(self,count=0):
        self.count=count
    def add_count(self,count=1):
        self.count+=count
    def __str__(self):
        return 'count: {}'.format(self.count)
    def __repr__(self):
        return ''

class Head(Node):
    def __init__(self,count=1):
        super().__init__(count)
        self.distinct = hyperloglog.HyperLogLog(0.01)
        self.maxID=''
    def __str__(self):
        return '[total count: {}, distinct: {}, max: {}]'.format(self.count,len(self.distinct),self.maxID)
    def __repr__(self):
        return '[total count: {}, distinct: {}, max: {}]'.format(self.count,len(self.distinct),self.maxID)

class Tail(Node):
    def __init__(self,ID,count):
        self.ID = ID
        super().__init__(count)
    def __str__(self):
        return '[ID: {}, count: {}]'.format(self.ID,self.count)
    def __repr__(self):
        return '[ID: {}, count: {}]'.format(self.ID,self.count)




# ==========================UpdateSk==========================
def UpdateSk(element,Sk_head,Sk):
    # print("In UpdateSk()")
    e_max=get_emax()
    width,depth=get_width_depth()
    numerator,denominator=get_fraction()
    col,row=position(element)
        # col / row index of element 
    avg=0
        # ID in Sketch is hash value
    #print("{} send to Sk[{}][{}]".format(element,row,col))
    # ==========================update sketch==========================
    Sk_head[row].count+=element.count
    if col<width:
        # e in Sketch
        Sk[row][col]+=1
    else:
        # e in Other
        count_sum=sum(i for i in Sk[row])
        avg=(Sk_head[row].count-count_sum)//(width*((numerator/denominator)-1))
    Update_local_max(Sk_head[row],Sk[row],element,col)
    Update_emax(Sk_head,Sk)

    
    # now we have:
    # ID: h2(h1(e)), hash value of e
    # row: h1(e), row index of e
    # index: column index of e
    # Sk_head[row].count: total count of Sk[row]
    # Sk_head[row].maxID: local max count element of Sk[row]
    # Sk_head[row].distinct: estimated element of Other part in Sk[row]
    # count_sum: sum of count in Sketch[row]
    # avg: average count of Other part in Sk[row]

    '''
    print("e_max:{}".format(e_max))
    for i in range(len(Sk)):
        print("Sk[{}]:{},{}".format(i,Sk_head[i],Sk[i]))
    print('')
    '''

# ==========================update local max==========================       
def Update_local_max(head_item,element_list,element,column):
    # pass single row
    numerator,denominator=get_fraction()
    width,depth=get_width_depth()
    if head_item.maxID=='':
        head_item.maxID=element.ID
    else:
        local_max_col=(mmh3.hash(head_item.maxID,signed=False))% ((width*numerator)//denominator)
        if local_max_col<width:
            # local max in Sketch
            if column<width:
                # e in Sketch
                if element_list[column]>element_list[local_max_col]:
                       head_item.maxID=element.ID
            else:
                # e in Other
                count_sum=sum(i for i in element_list)
                avg=(head_item.count-count_sum)//(width*((numerator/denominator)-1))
                if avg>element_list[local_max_col]:
                     head_item.maxID=element.ID
        else:
            # local max in Other
            count_sum=sum(i for i in element_list)
            avg=(head_item.count-count_sum)//(width*((numerator/denominator)-1))  
            if column<width:
                if column<width:
                    if element_list[column]>avg:
                           head_item.maxID=element.ID
                else:
                    pass

# ==========================update e_max==========================
def Update_emax(head,sketch):
    # pass whole array
    e_max=get_emax()
    numerator,denominator=get_fraction()
    width,depth=get_width_depth()
    for i in range(len(head)):
        if head[i].maxID=='':
            continue
        else:
            local_max_col,local_max_row=position(Tail(head[i].maxID,0))
            if local_max_col<width:
                if sketch[local_max_row][local_max_col]>e_max.count:
                    e_max.ID=head[i].maxID
                    e_max.count=sketch[local_max_row][local_max_col]
            else:
                # local max in Other
                count_sum=sum(j for j in sketch[i])
                avg=(head[i].count-count_sum)//(width*((numerator/denominator)-1))
                if avg>e_max.count:
                    e_max.ID=head[i].maxID
                    e_max.count=avg
# ==========================Tools=========================    
def get_emax():
    return e_max
def get_width_depth():
    return width,depth

def find(e,element_list):
    # return index of e in element_list
    try:
        index=[ele.ID for ele in element_list].index(e.ID)
    except:
        index=-99
    return index

def position(element):
    numerator,denominator=get_fraction()
    width,depth=get_width_depth()
    hash1=spookyhash.hash32(bytes(str(element.ID),encoding='utf-8'))
        # input of spooky: byte
        # output of spooky:unsigned- 32 bit int
    hash2=mmh3.hash(element.ID, signed=False)
        # input of mmh: str
        # output: unsigned- 32 bit int
    col=hash2 % ((width*numerator)//denominator)
    row=hash1 % depth
    return col,row
def get_fraction():
    return numerator,denominator    
    
# ==========================main=========================    

filename='kosarak.dat'
filepath="..\\dataset\\"
src_data=os.path.join(filepath,filename)
depth=4
width=128
size=1024
numerator=12
denominator=10

start=time.time()
Sk_head=[Head(0) for j in range(depth)]

Sketch=np.zeros((depth,width),dtype='int')

e_max=Tail('',0)

item_count=100
income=0
with open(src_data,'r') as file:
    while True:
        e=file.readline().strip('\n')
        if not e:
            break
        else:
            #item_count-=1
            # income+=1
            # print("read {}-th element:".format(income))
            item=Tail(e,1)
            UpdateSk(item,Sk_head,Sketch)
end=time.time()
print("Execution time:{} seconds.".format(str(end-start)))
print("e_max:{}".format(e_max))
for i in range(len(Sketch)):
    print("Sk[{}]:{},{}".format(i,Sk_head[i],Sketch[i]))
print('') 

Execution time:64.12468361854553 seconds.
e_max:[ID: 3, count: 37374]
Sk[0]:[total count: 223820, distinct: 0, max: 11 6],[ 1621  1819  1173  1244  1380  1316  1557  4447  1216  1345  1530  1927
  1337  1374  1152  1234  1226  1086  1126  1166  1333  1110  1318  1146
  1119  1116  1148  1185  1298 14231  1471  1308  1285  1184  1122  2377
  1702  1216  1338  1313  1420  1240  2315  1333  1348  1170  1114  1251
  1200  1198  1219  1446  1401  1162  1496  1255  1276  1386  1172  1166
  1242  1180  1352  1194  1419  1240  1466  2899  1229  1367  2240  1374
  1273  1314  1205  1339  1157  1154  1788  1537  1157  1127  1512  1318
  1129  1661  1177  1272  4984  1395  1259  1202  1164  1264  1215  1371
  1601  1159  1279  1314  1203  1344  1787  1166  1255  1209  1283  1176
  1209  1290  1560  1365  1221  1318  1474  1185  1326  1369  1466  1303
  1194  1287  1320  1309  1152  1261  1201  1086]
Sk[1]:[total count: 228615, distinct: 0, max: 1],[ 1392  1595  1304  1368  1237  1369  1334  1572 

In [None]:
## gpro2dot + graphviz的指令
執行並輸出.out檔：
python -m cProfile -o 0408.out 0408.py
產生圖形：
python -m gprof2dot -f pstats 0408.out | dot -T png -o 0408.png