In [1]:
from pymongo import MongoClient
from kubernetes import client, config
from pprint import pprint
from kubernetes.client.rest import ApiException

In [2]:
MONGO_HOST = '192.168.178.168'
MONGO_PORT = 32017
NAMESPACE = 'default'
K8S_SHARDS = 0


# Load local kubeconfig (e.g., ~/.kube/config)
config.load_kube_config()

# Create API client for core Kubernetes resources
apps_V1 = client.AppsV1Api()
core_V1 = client.CoreV1Api()

In [3]:
def connect_to_kubernetes():
    global K8S_SHARDS
    K8S_SHARDS = 0
    try:
        print("📋 Listing all pods and their statuses:\n")

        pods = core_V1.list_namespaced_pod(namespace=NAMESPACE,watch=False)
        for pod in pods.items:
            name = pod.metadata.name
            status = pod.status.phase
            if "mongodb-shard" in name:
                K8S_SHARDS += 1
            print(f" - {name}: {status}")
        print(f"\n Currently there are {K8S_SHARDS} shards in k8s")

    except ApiException as e:
        print(f"❌ Kubernetes API error: {e}")
    except Exception as e:
        print(f"❌ Failed to connect to Kubernetes: {e}")

connect_to_kubernetes()

📋 Listing all pods and their statuses:

 - mongodb-configsvr-0: Running
 - mongodb-mongos-6b769fd847-nx27h: Running
 - mongodb-mongos-6b769fd847-r2zq7: Running
 - mongodb-shard1-0: Running
 - mongodb-shard2-0: Running

 Currently there are 2 shards in k8s


In [4]:
# Replace with your mongos address and authentication if needed
mongos_uri = "mongodb://192.168.178.168:32017"

MONGO_SHARDS = 0

# Connect to the mongos router
mongoClient = MongoClient(mongos_uri)

# Test connection by listing databases
try:
    print("Connected to MongoDB. Databases:")
    pprint(mongoClient.list_database_names())
except Exception as e:
    print("Connection failed:", e)


Connected to MongoDB. Databases:
['admin', 'config', 'ycsb_sharded']


In [5]:
def list_shards(mongos_client: MongoClient):
    global MONGO_SHARDS
    """
    Lists all current shards in the MongoDB sharded cluster.
    
    :param mongos_client: The MongoClient connected to mongos
    """
    try:
        result = mongos_client.admin.command("listShards")
        MONGO_SHARDS = len(result['shards'])
        print(f"📦 Current shards in the cluster: {MONGO_SHARDS}")
        for shard in result["shards"]:
            print(f" - {shard['_id']}: {shard['host']}")
    except Exception as e:
        print(f"❌ Failed to list shards: {e}")

# List current shards
list_shards(mongoClient)

📦 Current shards in the cluster: 2
 - shard1: shard1/mongodb-shard1-0.mongodb-shard1.default.svc.cluster.local:27018
 - shard2: shard2/mongodb-shard2-0.mongodb-shard2.default.svc.cluster.local:27018


In [None]:
def remove_shard(mongos_client: MongoClient, shard_id: int):
    """
    Initiates or continues removal of a shard from the MongoDB cluster.
    This function executes the `removeShard` command only once, allowing
    external control over when to reissue it.

    :param mongos_client: The MongoClient connected to mongos
    :param shard_id: Numeric string like '3' (for shard3)
    """
    global MONGO_SHARDS
    shard_name = f"shard{shard_id}"

    try:
        res = mongos_client.admin.command("removeShard", shard_name)
        state = res["state"]
        print(f"📦 removeShard called on: {shard_name} (state: {state})")
        pprint(res)
        if (state == 'completed'):
            MONGO_SHARDS -= 1
    except Exception as e:
        print(f"❌ Failed to remove shard {shard_name}: {e}")


def add_shard(mongos_client: MongoClient, shard_id: int):
    """
    Adds a shard to the MongoDB cluster. Constructs shard URI from the shard ID.
    
    :param mongos_client: The MongoClient connected to mongos
    :param shard_id: Numeric string like '3' (for shard3)
    """
    global MONGO_SHARDS
    shard_name = f"shard{shard_id}"
    host = f"mongodb-shard{shard_id}-0.mongodb-shard{shard_id}.default.svc.cluster.local:27018"
    shard_uri = f"{shard_name}/{host}"

    try:
        result = mongos_client.admin.command("addShard", shard_uri)
        print(f"✅ Shard added: {shard_uri}")
        pprint(result)
        MONGO_SHARDS += 1
    except Exception as e:
        print(f"❌ Failed to add shard {shard_uri}: {e}")


def reshard_collection(mongos_client: MongoClient, db_name="ycsb_sharded", coll_name="usertable", chunks=40):
    """
    Initiates a reshardCollection operation on the given collection with:
    - forceRedistribution: true
    - numInitialChunks: 40
    - shardKey: { country: "hashed" }

    :param mongos_client: MongoClient connected to mongos
    :param db_name: Name of the database (e.g., "ycsb_sharded")
    :param coll_name: Name of the collection (e.g., "usertable")
    """
    numInitialChunks = chunks
    while (numInitialChunks > 2*MONGO_SHARDS):
        try:
            namespace = f"{db_name}.{coll_name}"
            cmd = {
                "reshardCollection": namespace,
                "key": { "country": "hashed" },
                "numInitialChunks": numInitialChunks,
                "forceRedistribution": True
            }
            print(f"Resharding initiated for {namespace} with {numInitialChunks} chunks...✅")
            result = mongos_client.admin.command(cmd)
            from pprint import pprint
            pprint(result)
            return True

        except Exception as e:
            #print(f"Failed to reshard {namespace}:❌  \n{e}")
            numInitialChunks -= 1
    
    return False

from bson.int64 import Int64

def split_hashed_chunks(mongos_client: MongoClient, db_name="ycsb_sharded", coll_name="usertable", num_chunks=40):
    """
    Splits a collection sharded on a hashed key into `num_chunks` chunks
    using evenly distributed hashed key space split points.

    :param mongos_client: MongoClient connected to mongos
    :param db_name: Name of the database
    :param coll_name: Name of the collection
    :param num_chunks: Number of chunks to split into (default: 40)
    """
    try:
        print(f"🔧 Splitting {db_name}.{coll_name} into {num_chunks} chunks on hashed 'country'...")

        ns = f"{db_name}.{coll_name}"
        min_hash = -2**63
        max_hash = 2**63

        step = (max_hash - min_hash) // num_chunks
        split_points = [Int64(min_hash + i * step) for i in range(1, num_chunks)]

        for i, split_point in enumerate(split_points):
            cmd = {
                "split": ns,
                "middle": { "country": split_point }
            }
            res = mongos_client.admin.command(cmd)
            print(f"✅ Split {i+1}/{num_chunks - 1} at hashed value {split_point}")

        print(f"🎉 Done splitting into {num_chunks} chunks.")

    except Exception as e:
        print(f"❌ Failed during split: {e}")


In [8]:
def scale_k8s_shard_down(apps_v1, shard_id: int, namespace: str = "default"):
    """
    Scales the StatefulSet for the given shard ID to 0 replicas.
    
    :param apps_v1: An initialized AppsV1Api client
    :param shard_id: Integer ID of the shard (e.g., 3 for mongodb-shard3)
    :param namespace: Kubernetes namespace (default: "default")
    """
    global K8S_SHARDS
    shard_name = f"mongodb-shard{shard_id}"
    try:
        apps_v1.patch_namespaced_stateful_set_scale(
            name=shard_name,
            namespace=namespace,
            body={"spec": {"replicas": 0}}
        )
        print(f"🔻 Scaled {shard_name} to 0 replicas.")
        K8S_SHARDS -= 1
    except client.exceptions.ApiException as e:
        print(f"❌ Failed to scale down {shard_name}: {e}")


def scale_k8s_shard_up(apps_v1, shard_id: int, namespace: str = "default"):
    """
    Scales the StatefulSet for the given shard ID to 1 replica.
    
    :param apps_v1: An initialized AppsV1Api client
    :param shard_id: Integer ID of the shard (e.g., 3 for mongodb-shard3)
    :param namespace: Kubernetes namespace (default: "default")
    """
    global K8S_SHARDS
    shard_name = f"mongodb-shard{shard_id}"
    try:
        apps_v1.patch_namespaced_stateful_set_scale(
            name=shard_name,
            namespace=namespace,
            body={"spec": {"replicas": 1}}
        )
        print(f"🔺 Scaled {shard_name} to 1 replica.")
        K8S_SHARDS += 1
    except client.exceptions.ApiException as e:
        print(f"❌ Failed to scale up {shard_name}: {e}")


def print_statefulsets_info(apps_v1: client.AppsV1Api, namespace: str = "default"):
    """
    Prints information about all StatefulSets in the specified namespace.

    :param apps_v1: Initialized AppsV1Api client
    :param namespace: Kubernetes namespace (default: "default")
    """
    try:
        statefulsets = apps_v1.list_namespaced_stateful_set(namespace=namespace)
        if not statefulsets.items:
            print(f"ℹ️ No StatefulSets found in namespace '{namespace}'.")
            return

        print(f"📦 StatefulSets in namespace '{namespace}':\n")
        for sts in statefulsets.items:
            name = sts.metadata.name
            replicas = sts.spec.replicas
            ready_replicas = sts.status.ready_replicas or 0
            selector = sts.spec.selector.match_labels
            print(f" - {name}")
            print(f"    🔢 Replicas: {ready_replicas}/{replicas}")
            print(f"    🏷️ Selector: {selector}\n")

    except client.exceptions.ApiException as e:
        print(f"❌ Kubernetes API error: {e}")
    except Exception as e:
        print(f"❌ Failed to retrieve StatefulSets: {e}")


In [None]:
print_statefulsets_info(apps_V1)

📦 StatefulSets in namespace 'default':

 - mongodb-configsvr
    🔢 Replicas: 1/1
    🏷️ Selector: {'app': 'mongodb-configsvr'}

 - mongodb-shard1
    🔢 Replicas: 1/1
    🏷️ Selector: {'app': 'mongodb-shard1'}

 - mongodb-shard2
    🔢 Replicas: 1/1
    🏷️ Selector: {'app': 'mongodb-shard2'}

 - mongodb-shard3
    🔢 Replicas: 0/0
    🏷️ Selector: {'app': 'mongodb-shard3'}

 - mongodb-shard4
    🔢 Replicas: 0/0
    🏷️ Selector: {'app': 'mongodb-shard4'}



In [None]:
**Always scale down MongoDB before Kubernetes and scale up Kubernetes before MongoDB**

In [None]:
scale_k8s_shard_down(apps_V1, 3)

In [None]:
scale_k8s_shard_up(apps_V1, 2)

In [None]:
add_shard(mongoClient, 3)

In [None]:
remove_shard(mongoClient, 3)

In [20]:
reshard_collection(mongoClient, chunks=40)

Resharding initiated for ycsb_sharded.usertable with 40 chunks...✅
Failed to reshard ycsb_sharded.usertable:❌  
The shard key provided does not have enough cardinality to make the required number of chunks of 40, it can only make 28 chunks, full error: {'ok': 0.0, 'errmsg': 'The shard key provided does not have enough cardinality to make the required number of chunks of 40, it can only make 28 chunks', 'code': 4952606, 'codeName': 'Location4952606', '$clusterTime': {'clusterTime': Timestamp(1747311177, 16), 'signature': {'hash': b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'keyId': 0}}, 'operationTime': Timestamp(1747311177, 16)}
Resharding initiated for ycsb_sharded.usertable with 39 chunks...✅
Failed to reshard ycsb_sharded.usertable:❌  
The shard key provided does not have enough cardinality to make the required number of chunks of 39, it can only make 31 chunks, full error: {'ok': 0.0, 'errmsg': 'The shard key provided does not have enough ca

True

In [19]:
split_hashed_chunks(mongoClient, num_chunks=40)

🔧 Splitting ycsb_sharded.usertable into 40 chunks on hashed 'country'...
✅ Split 1/39 at hashed value -8762203435012037018
✅ Split 2/39 at hashed value -8301034833169298228
✅ Split 3/39 at hashed value -7839866231326559438
✅ Split 4/39 at hashed value -7378697629483820648
✅ Split 5/39 at hashed value -6917529027641081858
✅ Split 6/39 at hashed value -6456360425798343068
✅ Split 7/39 at hashed value -5995191823955604278
✅ Split 8/39 at hashed value -5534023222112865488
✅ Split 9/39 at hashed value -5072854620270126698
✅ Split 10/39 at hashed value -4611686018427387908
✅ Split 11/39 at hashed value -4150517416584649118
✅ Split 12/39 at hashed value -3689348814741910328
✅ Split 13/39 at hashed value -3228180212899171538
✅ Split 14/39 at hashed value -2767011611056432748
✅ Split 15/39 at hashed value -2305843009213693958
✅ Split 16/39 at hashed value -1844674407370955168
✅ Split 17/39 at hashed value -1383505805528216378
✅ Split 18/39 at hashed value -922337203685477588
✅ Split 19/39 at ha

In [16]:
def same_replicas_number():
    global MONGO_SHARDS, K8S_SHARDS
    if (MONGO_SHARDS == K8S_SHARDS):
        print(f"The number of replicas in Kubernetes and MongoDB is the same: {K8S_SHARDS}")
        return True
    else:
        print("The number of replicas is different:")
        print(f"- {K8S_SHARDS} in Kubernetes")
        print(f"- {MONGO_SHARDS} in MongoDB")
        return False

In [18]:
same_replicas_number()

The number of replicas in Kubernetes and MongoDB is the same: 3


True

In [13]:
def scale_cluster_down(mongoClient, apps_V1):
    if (same_replicas_number == False):
        print("❌Can't scale down because replicas number is different!")
        same_replicas_number()
        return False
    else:
        print(f"Starting scaling down from {MONGO_SHARDS} to {MONGO_SHARDS - 1} replicas...\n")
        remove_shard(mongoClient, MONGO_SHARDS)
        print("Resharding the collection...\n")
        reshard_collection(mongoClient)
        remove_shard(mongoClient, MONGO_SHARDS)
        scale_k8s_shard_down(apps_V1, K8S_SHARDS)
        print("✅Scaled down succesfully!")
        same_replicas_number()
        return True

def scale_cluster_up(mongoClient, apps_V1):
    if (same_replicas_number == False):
        print("❌Can't scale up because replicas number is different!")
        same_replicas_number()
        return False
    else:
        print(f"Starting scaling up from {K8S_SHARDS} to {K8S_SHARDS + 1} replicas...\n")
        scale_k8s_shard_up(apps_V1, K8S_SHARDS + 1)
        add_shard(mongoClient, K8S_SHARDS)
        print("Resharding the collection...\n")
        reshard_collection(mongoClient)
        print("✅Scaled up succesfully!")
        same_replicas_number()
        return True
        

In [None]:
scale_cluster_down(mongoClient, apps_V1)

In [21]:
scale_cluster_up(mongoClient, apps_V1)

Starting scaling up from 3 to 4 replicas...

🔺 Scaled mongodb-shard4 to 1 replica.
✅ Shard added: shard4/mongodb-shard4-0.mongodb-shard4.default.svc.cluster.local:27018
{'$clusterTime': {'clusterTime': Timestamp(1747312123, 20),
                  'signature': {'hash': b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                        b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                        b'\x00\x00\x00\x00',
                                'keyId': 0}},
 'ok': 1.0,
 'operationTime': Timestamp(1747312123, 20),
 'shardAdded': 'shard4'}
Resharding the collection...

Resharding initiated for ycsb_sharded.usertable with 40 chunks...✅
Failed to reshard ycsb_sharded.usertable:❌  
The shard key provided does not have enough cardinality to make the required number of chunks of 40, it can only make 26 chunks, full error: {'ok': 0.0, 'errmsg': 'The shard key provided does not have enough cardinality to make the required number of chunks of 40, it can only make 26 chu

KeyboardInterrupt: 