In [1]:
import random
import hashlib
import string
import csv
import numpy
from numpy import genfromtxt

LOCALHOST = '127.0.0.1'
PORT_START = 30000
PORT_END = 40000
NUM_REPS = 50
LOG_FILE_NAME = "hash_sim_log.txt"
LOG_FILE = open(LOG_FILE_NAME, 'w')
LOG_WRITER = csv.writer(LOG_FILE, delimiter=',')
LOG_WRITER.writerow(['NumKeys', 'NumNodes','NUM_REPS','NumKeysMigrated'])

def hash(key):
    return int(hashlib.sha1(key).hexdigest(), 16)

def getRandomString(size=6, chars=string.ascii_uppercase + string.digits):
    return ''.join(random.choice(chars) for _ in range(size))

#generate numAddrs number of localhost+port addresses
def generateAddrs(numAddrs):
    addrs = []
    for i in range(numAddrs):
        port = random.randint(PORT_START, PORT_END)
        addrs.append(LOCALHOST+":"+str(port))
    return addrs

def generate_hash_to_back_map(addrs):
    hash_to_back_map = {}
    for addr in addrs:
        hash_to_back_map[hash(addr)] = addr
    return hash_to_back_map

#generate numKeys random keys and get the final distribution of these keys
def generateKeyDistribution(hash_to_back_map,numKeys):
    backs_hash = hash_to_back_map.keys()
    key_dist = {}
    back_to_keys = {}
    for i in range(numKeys):
        key = getRandomString(random.randint(3, 10))
        master_hash = getMasterHashValue(key,backs_hash)
        master_addr = hash_to_back_map[master_hash]
        if master_addr in key_dist:
            key_dist[master_addr] = key_dist[master_addr] + 1
            back_to_keys[master_addr].append(key)
        else:
            key_dist[master_addr] = 1
            back_to_keys[master_addr] = [key]
    return back_to_keys,key_dist

    
# for a given key, get the hash value of the master corresponding to the key
def getMasterHashValue(key,backs_hash):
#     print "getMasterHashValue - key:",key
#     print "getMasterHashValue - backs_hash:", backs_hash 
    key_hash = hash(key)
    backs_hash_cp = []
    for b in backs_hash:
        backs_hash_cp.append(b)
    backs_hash_cp.append(key_hash)
    backs_hash_cp = sorted(backs_hash_cp)
    index = backs_hash_cp.index(key_hash)
    master_hash = backs_hash_cp[(index+1)%len(backs_hash_cp)]
    return master_hash

def getPrevBackendHashValue(key,backs_hash):
    key_hash = hash(key)
    backs_hash_cp = []
    for b in backs_hash:
        backs_hash_cp.append(b)
    backs_hash_cp.append(key_hash)
    backs_hash_cp = sorted(backs_hash_cp)
    index = backs_hash_cp.index(key_hash)
    prev_hash = backs_hash_cp[(index-1)%len(backs_hash_cp)]
    return prev_hash
        
# given an initial distribution of keys, compute the number of keys that are migrated when a new random node joins
def getNumKeysMigratedForMasterOnJoin(back_to_keys,key_dist,hash_to_back_map):
    addr = generateAddrs(1)[0]
#     print "addr of new node joining:", addr
    addr_hash = hash(addr)
    backs_hash = hash_to_back_map.keys()
    master_hash = getMasterHashValue(addr,backs_hash)
    master_addr = hash_to_back_map[master_hash]
#     print "master of new node:", master_addr
    master_keys = back_to_keys[master_addr]
#     print "keys of master of new node:", master_keys
    prev_hash = getPrevBackendHashValue(addr,backs_hash)
    prev_addr = hash_to_back_map[prev_hash]
#     print "prev of new node:",prev_addr
    numKeysMigrated = 0
    key_migrated = []
    for key in master_keys:
        key_hash = hash(key)
        if addr_hash > prev_hash:
            if key_hash > prev_hash and key_hash <= addr_hash:
                numKeysMigrated = numKeysMigrated + 1
                key_migrated.append(key)
        else:
            if key_hash > prev_hash or (key_hash >=0 or key_hash <= addr_hash):
                numKeysMigrated = numKeysMigrated + 1
                key_migrated.append(key)
    return numKeysMigrated,key_migrated

def main():
#     for numNodes in range(10,100,10):
#         addrs = generateAddrs(numNodes)
# #         print "addresses of backends:",addrs
#         hash_to_back_map = generate_hash_to_back_map(addrs)
#         for numKeys in range(1000,3001,1000):  
#             for numReps in range(NUM_REPS):
#             #     print "back_to_keys map:",back_to_keys
#             #     print "key_dist map:",key_dist
#                 back_to_keys,key_dist = generateKeyDistribution(hash_to_back_map,numKeys)
#                 numKeysMigrated,key_migrated = getNumKeysMigratedForMasterOnJoin(back_to_keys,key_dist,hash_to_back_map)
# #                 print "key_migrated:",key_migrated
#                 print "numkeysmigrated:",numKeysMigrated
#                 LOG_WRITER.writerow([numKeys,numNodes,numReps,numKeysMigrated])
#             LOG_FILE.flush()
    
    
    my_data = genfromtxt(LOG_FILE_NAME, delimiter=',')
    shape(my_data)
    


if __name__ == '__main__':
    main()
    
    

ImportError: No module named numpy