In [26]:
import random, string, requests, time, json
from tqdm import tqdm
from threading import Thread

localhost = "http://127.0.0.1"
url = localhost + ":5000/"


In [27]:
records = [
    {
        "Stud_id" : i,
        "Stud_name" : ''.join(random.choices(string.ascii_uppercase, k = 6)),
        "Stud_marks" : random.randint(0, 100)
    }
    
    for i in range(0, 10000)
]

In [28]:
def read_target(batch_size : int = 10):
    lo = random.randint(0, 10000)
    hi = lo + batch_size
    r = requests.post(url + "read", data = json.dumps({"Stud_id" : {"low" : lo, "high" : hi}}))

def readN(N : int = 1000, batch_size : int = 10):
    
    start = time.time()
    
    
    threads = []
    for i in tqdm(range(N)):
        threads.append(Thread(target = lambda: read_target(batch_size,)))
        threads[-1].start()
    for i in tqdm(range(N)):
        threads[i].join()
        

    end = time.time()
    
    return f"{N} read requests with batches of {batch_size} took {end-start:.2f} seconds."

In [29]:
def write_target(batch_size : int = 10):
    data = random.choices(records, k = batch_size)
    r = requests.post(url + "write", data = json.dumps({"data" : data}))

def writeN(N : int = 1000, batch_size : int = 10):
    
    start = time.time()
    
    threads = []
    for i in tqdm(range(N)):
        threads.append(Thread(target = lambda: write_target(batch_size,)))
        threads[-1].start()
    for i in tqdm(range(N)):
        threads[i].join()
    
    end = time.time()
    
    return f"{N} write requests with batches of {batch_size} took {end-start:.2f} seconds."

# A-1

Report the read and write speed for 10000 writes and 10000 reads in the default configuration given

In [30]:
# Initialise

init = {
    "N":3,
    "schema":{"columns":["Stud_id","Stud_name","Stud_marks"],"dtypes":["Number","String","String"]},
    "shards":[{"Stud_id_low":0, "Shard_id": "sh1", "Shard_size":4096},
            {"Stud_id_low":4096, "Shard_id": "sh2", "Shard_size":4096},
            {"Stud_id_low":8192, "Shard_id": "sh3", "Shard_size":4096}],
    "servers":{"Server0":["sh1","sh2"],
            "Server1":["sh2","sh3"],
            "Server2":["sh1","sh3"]}
}

r = requests.post(url + "init", data = json.dumps(init))

In [31]:
# Check Status

r = requests.get(f"{localhost}:5000/status")
r.json()

{'N': 3,
 'schema': {'columns': ['Stud_id', 'Stud_name', 'Stud_marks'],
  'dtypes': ['Number', 'String', 'String']},
 'servers': {'Server0': ['sh1', 'sh2'],
  'Server1': ['sh2', 'sh3'],
  'Server2': ['sh1', 'sh3']},
 'shards': [{'Shard_id': 'sh1',
   'Shard_size': 4096,
   'Stud_id_low': 0,
   'primary_server': 'Server2'},
  {'Shard_id': 'sh2',
   'Shard_size': 4096,
   'Stud_id_low': 4096,
   'primary_server': 'Server1'},
  {'Shard_id': 'sh3',
   'Shard_size': 4096,
   'Stud_id_low': 8192,
   'primary_server': 'Server2'}]}

In [32]:
## Write


writeN()

100%|██████████| 1000/1000 [00:01<00:00, 582.67it/s]
100%|██████████| 1000/1000 [00:28<00:00, 34.70it/s]


'1000 write requests with batches of 10 took 30.55 seconds.'

In [33]:
# Read

readN()

100%|██████████| 1000/1000 [00:02<00:00, 459.82it/s]
100%|██████████| 1000/1000 [00:01<00:00, 508.40it/s]


'1000 read requests with batches of 10 took 4.15 seconds.'

# A-2

Increase the number of shard replicas (to 7) from the configuration (init endpoint). Report the write speed down for
10000 writes and read speed up for 10000 reads.

In [34]:
# Initialise

init = {
    "N":7,
    "schema":{"columns":["Stud_id","Stud_name","Stud_marks"],"dtypes":["Number","String","String"]},
    "shards":[{"Stud_id_low":0, "Shard_id": "sh1", "Shard_size":4096},
            {"Stud_id_low":4096, "Shard_id": "sh2", "Shard_size":4096},
            {"Stud_id_low":8192, "Shard_id": "sh3", "Shard_size":4096}],
    "servers":{
            "Server0":["sh1", "sh2", "sh3"],
            "Server1":["sh1", "sh2", "sh3"],
            "Server2":["sh1", "sh2", "sh3"],
            "Server3":["sh1", "sh2", "sh3"],
            "Server4":["sh1", "sh2", "sh3"],
            "Server5":["sh1", "sh2", "sh3"],
            "Server6":["sh1", "sh2", "sh3"],
            }
}

r = requests.post(url + "init", data = json.dumps(init))

In [35]:
# Check Status

r = requests.get(f"{localhost}:5000/status")
r.json()

{'N': 7,
 'schema': {'columns': ['Stud_id', 'Stud_name', 'Stud_marks'],
  'dtypes': ['Number', 'String', 'String']},
 'servers': {'Server0': ['sh1', 'sh2', 'sh3'],
  'Server1': ['sh1', 'sh2', 'sh3'],
  'Server2': ['sh1', 'sh2', 'sh3'],
  'Server3': ['sh1', 'sh2', 'sh3'],
  'Server4': ['sh1', 'sh2', 'sh3'],
  'Server5': ['sh1', 'sh2', 'sh3'],
  'Server6': ['sh1', 'sh2', 'sh3']},
 'shards': [{'Shard_id': 'sh1',
   'Shard_size': 4096,
   'Stud_id_low': 0,
   'primary_server': 'Server5'},
  {'Shard_id': 'sh2',
   'Shard_size': 4096,
   'Stud_id_low': 4096,
   'primary_server': 'Server5'},
  {'Shard_id': 'sh3',
   'Shard_size': 4096,
   'Stud_id_low': 8192,
   'primary_server': 'Server5'}]}

In [36]:
writeN()

100%|██████████| 1000/1000 [00:01<00:00, 643.22it/s]
100%|██████████| 1000/1000 [01:06<00:00, 15.14it/s]


'1000 write requests with batches of 10 took 67.59 seconds.'

In [37]:
readN()

100%|██████████| 1000/1000 [00:02<00:00, 459.73it/s]
100%|██████████| 1000/1000 [00:02<00:00, 496.38it/s]


'1000 read requests with batches of 10 took 4.20 seconds.'

# A-3

Increase the number of Servers (to 10) by adding new servers and increase the number of shards (shard to 6, shard
replicas to 8). Define the (init endpoint) configurations according to your choice. Report the write speed up for 10000
writes and read speed up for 10000 reads

In [38]:
# Initialise

init = {
    "N":10,
    "schema":{"columns":["Stud_id","Stud_name","Stud_marks"],"dtypes":["Number","String","String"]},
    "shards":[{"Stud_id_low":0, "Shard_id": "sh1", "Shard_size":4096},
            {"Stud_id_low":4096, "Shard_id": "sh2", "Shard_size":4096},
            {"Stud_id_low":8192, "Shard_id": "sh3", "Shard_size":4096},
            {"Stud_id_low":12288, "Shard_id": "sh4", "Shard_size":4096},
            {"Stud_id_low":16384, "Shard_id": "sh5", "Shard_size":4096},
            {"Stud_id_low":20480, "Shard_id": "sh6", "Shard_size":4096}],
    "servers":{
            "Server0":["sh1", "sh2", "sh3", "sh4", "sh6"],
            "Server1":["sh1", "sh2", "sh3", "sh4", "sh6"],
            "Server2":["sh1", "sh2", "sh3", "sh5", "sh6"],
            "Server3":["sh1", "sh2", "sh3", "sh5", "sh6"],
            "Server4":["sh1", "sh2", "sh4", "sh5", "sh6"],
            "Server5":["sh1", "sh2", "sh4", "sh5", "sh6"],
            "Server6":["sh1", "sh3", "sh4", "sh5", "sh6"],
            "Server7":["sh1", "sh3", "sh4", "sh5", "sh6"],
            "Server8":["sh2", "sh3", "sh4", "sh5"],
            "Server9":["sh2", "sh3", "sh4", "sh5"]
            }
}

r = requests.post(url + "init", data = json.dumps(init))

In [39]:
# Check Status

r = requests.get(f"{localhost}:5000/status")
r.json()

{'N': 10,
 'schema': {'columns': ['Stud_id', 'Stud_name', 'Stud_marks'],
  'dtypes': ['Number', 'String', 'String']},
 'servers': {'Server0': ['sh1', 'sh2', 'sh3', 'sh4', 'sh6'],
  'Server1': ['sh1', 'sh2', 'sh3', 'sh4', 'sh6'],
  'Server2': ['sh1', 'sh2', 'sh3', 'sh5', 'sh6'],
  'Server3': ['sh1', 'sh2', 'sh3', 'sh5', 'sh6'],
  'Server4': ['sh1', 'sh2', 'sh4', 'sh5', 'sh6'],
  'Server5': ['sh1', 'sh2', 'sh4', 'sh5', 'sh6'],
  'Server6': ['sh1', 'sh3', 'sh4', 'sh5', 'sh6'],
  'Server7': ['sh1', 'sh3', 'sh4', 'sh5', 'sh6'],
  'Server8': ['sh2', 'sh3', 'sh4', 'sh5'],
  'Server9': ['sh2', 'sh3', 'sh4', 'sh5']},
 'shards': [{'Shard_id': 'sh1',
   'Shard_size': 4096,
   'Stud_id_low': 0,
   'primary_server': 'Server1'},
  {'Shard_id': 'sh2',
   'Shard_size': 4096,
   'Stud_id_low': 4096,
   'primary_server': 'Server1'},
  {'Shard_id': 'sh3',
   'Shard_size': 4096,
   'Stud_id_low': 8192,
   'primary_server': 'Server1'},
  {'Shard_id': 'sh4',
   'Shard_size': 4096,
   'Stud_id_low': 12288,
 

In [40]:
writeN()

100%|██████████| 1000/1000 [00:01<00:00, 660.71it/s]
100%|██████████| 1000/1000 [01:16<00:00, 13.04it/s]


'1000 write requests with batches of 10 took 78.19 seconds.'

In [41]:
readN()

100%|██████████| 1000/1000 [00:02<00:00, 471.01it/s]
100%|██████████| 1000/1000 [00:01<00:00, 626.89it/s]


'1000 read requests with batches of 10 took 3.73 seconds.'

# A-4 

Finally, check all the endpoints and ensure their correctness. Manually drop a server container and show that the load
balancer spawns a new container and copies the shard entries from other replicas.