In [24]:
import random, string, requests, time, json
from tqdm import tqdm

localhost = "http://127.0.0.1"
url = localhost + ":5000/"

In [25]:
records = [
    {
        "Stud_id" : i,
        "Stud_name" : ''.join(random.choices(string.ascii_uppercase, k = 6)),
        "Stud_marks" : random.randint(0, 100)
    }
    
    for i in range(0, 10000)
]

In [26]:
def readN(N : int = 10000, batch_size : int = 10):
    
    start = time.time()
    
    for i in tqdm(range(N)):
        lo = random.randint(0, 10000)
        hi = lo + batch_size
        r = requests.post(url + "read", data = json.dumps({"Stud_id" : {"low" : lo, "high" : hi}}))

    end = time.time()
    
    return f"{N} read requests with batches of {batch_size} took {end-start:.2f} seconds."

In [27]:
def writeN(N : int = 10000, batch_size : int = 10):
    
    start = time.time()
    
    for i in tqdm(range(N)):
        data = random.choices(records, k = batch_size)
        r = requests.post(url + "write", data = json.dumps({"data" : data}))
    
    end = time.time()
    
    return f"{N} write requests with batches of {batch_size} took {end-start:.2f} seconds."

# A-1

Report the read and write speed for 10000 writes and 10000 reads in the default configuration given

In [28]:
# Initialise

init = {
    "N":3,
    "schema":{"columns":["Stud_id","Stud_name","Stud_marks"],"dtypes":["Number","String","String"]},
    "shards":[{"Stud_id_low":0, "Shard_id": "sh1", "Shard_size":4096},
            {"Stud_id_low":4096, "Shard_id": "sh2", "Shard_size":4096},
            {"Stud_id_low":8192, "Shard_id": "sh3", "Shard_size":4096}],
    "servers":{"Server0":["sh1","sh2"],
            "Server1":["sh2","sh3"],
            "Server2":["sh1","sh3"]}
}

r = requests.post(url + "init", data = json.dumps(init))

In [29]:
# Check Status

r = requests.get(f"{localhost}:5000/status")
r.json()

{'N': 3,
 'schema': {'columns': ['Stud_id', 'Stud_name', 'Stud_marks'],
  'dtypes': ['Number', 'String', 'String']},
 'servers': {'Server0': ['sh1', 'sh2'],
  'Server1': ['sh2', 'sh3'],
  'Server2': ['sh1', 'sh3']},
 'shards': [{'Shard_id': 'sh1', 'Shard_size': 4096, 'Stud_id_low': 0},
  {'Shard_id': 'sh2', 'Shard_size': 4096, 'Stud_id_low': 4096},
  {'Shard_id': 'sh3', 'Shard_size': 4096, 'Stud_id_low': 8192}]}

In [30]:
## Write

writeN()

100%|██████████| 10000/10000 [05:21<00:00, 31.06it/s]


'10000 write requests with batches of 10 took 321.92 seconds.'

In [31]:
# Read

readN()

100%|██████████| 10000/10000 [02:41<00:00, 62.09it/s]


'10000 read requests with batches of 10 took 161.07 seconds.'

# A-2

Increase the number of shard replicas (to 7) from the configuration (init endpoint). Report the write speed down for
10000 writes and read speed up for 10000 reads.

In [32]:
# Initialise

init = {
    "N":7,
    "schema":{"columns":["Stud_id","Stud_name","Stud_marks"],"dtypes":["Number","String","String"]},
    "shards":[{"Stud_id_low":0, "Shard_id": "sh1", "Shard_size":4096},
            {"Stud_id_low":4096, "Shard_id": "sh2", "Shard_size":4096},
            {"Stud_id_low":8192, "Shard_id": "sh3", "Shard_size":4096}],
    "servers":{
            "Server0":["sh1", "sh2", "sh3"],
            "Server1":["sh1", "sh2", "sh3"],
            "Server2":["sh1", "sh2", "sh3"],
            "Server3":["sh1", "sh2", "sh3"],
            "Server4":["sh1", "sh2", "sh3"],
            "Server5":["sh1", "sh2", "sh3"],
            "Server6":["sh1", "sh2", "sh3"],
            }
}

r = requests.post(url + "init", data = json.dumps(init))

In [33]:
# Check Status

r = requests.get(f"{localhost}:5000/status")
r.json()

{'N': 7,
 'schema': {'columns': ['Stud_id', 'Stud_name', 'Stud_marks'],
  'dtypes': ['Number', 'String', 'String']},
 'servers': {'Server0': ['sh1', 'sh2', 'sh3'],
  'Server1': ['sh1', 'sh2', 'sh3'],
  'Server2': ['sh1', 'sh2', 'sh3'],
  'Server3': ['sh1', 'sh2', 'sh3'],
  'Server4': ['sh1', 'sh2', 'sh3'],
  'Server5': ['sh1', 'sh2', 'sh3'],
  'Server6': ['sh1', 'sh2', 'sh3']},
 'shards': [{'Shard_id': 'sh1', 'Shard_size': 4096, 'Stud_id_low': 0},
  {'Shard_id': 'sh2', 'Shard_size': 4096, 'Stud_id_low': 4096},
  {'Shard_id': 'sh3', 'Shard_size': 4096, 'Stud_id_low': 8192}]}

In [34]:
writeN()

100%|██████████| 10000/10000 [16:59<00:00,  9.81it/s]


'10000 write requests with batches of 10 took 1019.24 seconds.'

In [35]:
readN()

100%|██████████| 10000/10000 [02:40<00:00, 62.37it/s]


'10000 read requests with batches of 10 took 160.34 seconds.'

# A-3

Increase the number of Servers (to 10) by adding new servers and increase the number of shards (shard to 6, shard
replicas to 8). Define the (init endpoint) configurations according to your choice. Report the write speed up for 10000
writes and read speed up for 10000 reads

In [36]:
# Initialise

init = {
    "N":7,
    "schema":{"columns":["Stud_id","Stud_name","Stud_marks"],"dtypes":["Number","String","String"]},
    "shards":[{"Stud_id_low":0, "Shard_id": "sh1", "Shard_size":4096},
            {"Stud_id_low":4096, "Shard_id": "sh2", "Shard_size":4096},
            {"Stud_id_low":8192, "Shard_id": "sh3", "Shard_size":4096},
            {"Stud_id_low":12288, "Shard_id": "sh4", "Shard_size":4096},
            {"Stud_id_low":16384, "Shard_id": "sh5", "Shard_size":4096},
            {"Stud_id_low":20480, "Shard_id": "sh6", "Shard_size":4096}],
    "servers":{
            "Server0":["sh1", "sh2", "sh3", "sh4", "sh5", "sh6"],
            "Server1":["sh1", "sh2", "sh3", "sh4", "sh5", "sh6"],
            "Server2":["sh1", "sh2", "sh3", "sh4", "sh5", "sh6"],
            "Server3":["sh1", "sh2", "sh3", "sh4", "sh5", "sh6"],
            "Server4":["sh1", "sh2", "sh3", "sh4", "sh5", "sh6"],
            "Server5":["sh1", "sh2", "sh3", "sh4", "sh5", "sh6"],
            "Server6":["sh1", "sh2", "sh3", "sh4", "sh5", "sh6"],
            }
}

r = requests.post(url + "init", data = json.dumps(init))

In [37]:
# Check Status

r = requests.get(f"{localhost}:5000/status")
r.json()

{'N': 7,
 'schema': {'columns': ['Stud_id', 'Stud_name', 'Stud_marks'],
  'dtypes': ['Number', 'String', 'String']},
 'servers': {'Server0': ['sh1', 'sh2', 'sh3', 'sh4', 'sh5', 'sh6'],
  'Server1': ['sh1', 'sh2', 'sh3', 'sh4', 'sh5', 'sh6'],
  'Server2': ['sh1', 'sh2', 'sh3', 'sh4', 'sh5', 'sh6'],
  'Server3': ['sh1', 'sh2', 'sh3', 'sh4', 'sh5', 'sh6'],
  'Server4': ['sh1', 'sh2', 'sh3', 'sh4', 'sh5', 'sh6'],
  'Server5': ['sh1', 'sh2', 'sh3', 'sh4', 'sh5', 'sh6'],
  'Server6': ['sh1', 'sh2', 'sh3', 'sh4', 'sh5', 'sh6']},
 'shards': [{'Shard_id': 'sh1', 'Shard_size': 4096, 'Stud_id_low': 0},
  {'Shard_id': 'sh2', 'Shard_size': 4096, 'Stud_id_low': 4096},
  {'Shard_id': 'sh3', 'Shard_size': 4096, 'Stud_id_low': 8192},
  {'Shard_id': 'sh4', 'Shard_size': 4096, 'Stud_id_low': 12288},
  {'Shard_id': 'sh5', 'Shard_size': 4096, 'Stud_id_low': 16384},
  {'Shard_id': 'sh6', 'Shard_size': 4096, 'Stud_id_low': 20480}]}

In [38]:
#Add new servers

payload = {
    "n" : 3,
    "new_shards" : [],
    "servers" : {
        "Server7":["sh1","sh6"], 
        "Server8":["sh2","sh5"],
        "Server9":["sh3","sh4"]
        }
}

r = requests.post(url + "add", json = payload)

In [39]:
writeN()

100%|██████████| 10000/10000 [21:42<00:00,  7.67it/s]


'10000 write requests with batches of 10 took 1302.94 seconds.'

In [40]:
readN()

100%|██████████| 10000/10000 [02:37<00:00, 63.40it/s]


'10000 read requests with batches of 10 took 157.72 seconds.'

# A-4 

Finally, check all the endpoints and ensure their correctness. Manually drop a server container and show that the load
balancer spawns a new container and copies the shard entries from other replicas.