In [1]:
from time import sleep

In [2]:
from cassandra import InvalidRequest
from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster

In [3]:
def print_resultset_head(resultset):
    is_empty = True
    for i, res in enumerate(resultset):
        is_empty = False
        print(res)
        if i >= 2:
            break
    if is_empty:
        print(None)

In [4]:
def print_resultset_all(resultset):
    is_empty = True
    for res in resultset:
        is_empty = False
        print(res)
    if is_empty:
        print(None)

In [5]:
auth_provider = PlainTextAuthProvider("admin", "admin")
nodes = ["cassandra1"]
port = 9042

In [6]:
cluster = Cluster(
    nodes,
    port,
    auth_provider=auth_provider,
    protocol_version=4,
)
session = cluster.connect()

In [7]:
query = "SELECT * FROM system_schema.keyspaces;"
res = session.execute(query)
print_resultset_all(res)

Row(keyspace_name='system_auth', durable_writes=True, replication=OrderedMapSerializedKey([('class', 'org.apache.cassandra.locator.SimpleStrategy'), ('replication_factor', '1')]))
Row(keyspace_name='system_schema', durable_writes=True, replication=OrderedMapSerializedKey([('class', 'org.apache.cassandra.locator.LocalStrategy')]))
Row(keyspace_name='system_distributed', durable_writes=True, replication=OrderedMapSerializedKey([('class', 'org.apache.cassandra.locator.SimpleStrategy'), ('replication_factor', '3')]))
Row(keyspace_name='system', durable_writes=True, replication=OrderedMapSerializedKey([('class', 'org.apache.cassandra.locator.LocalStrategy')]))
Row(keyspace_name='system_traces', durable_writes=True, replication=OrderedMapSerializedKey([('class', 'org.apache.cassandra.locator.SimpleStrategy'), ('replication_factor', '2')]))


In [8]:
query = """
    CREATE KEYSPACE IF NOT EXISTS data_playground WITH REPLICATION = {
        'class': 'SimpleStrategy',
        'replication_factor': 1
    };
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa56563fd70>

In [9]:
query = """
    SELECT *
    FROM system_schema.keyspaces
    WHERE keyspace_name = 'data_playground';
"""
res = session.execute(query)
print_resultset_all(res)

Row(keyspace_name='data_playground', durable_writes=True, replication=OrderedMapSerializedKey([('class', 'org.apache.cassandra.locator.SimpleStrategy'), ('replication_factor', '1')]))


In [10]:
query = "USE data_playground"
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa56407bf50>

In [11]:
query = """
    SELECT table_name
    FROM system_schema.tables
    WHERE keyspace_name = 'data_playground'
"""
res = session.execute(query)
print_resultset_all(res)

None


In [12]:
# NOTE: partition_key = product_id, brand_id (brand_id used for learning purposes)
#       clustering_key = order_id
query = """
    CREATE TABLE IF NOT EXISTS purchases(
        event_time      TIMESTAMP,
        order_id        BIGINT,
        product_id      BIGINT,
        quantity        SMALLINT,
        category_id     BIGINT,
        category_code   VARCHAR,
        brand_id        BIGINT,
        price           DECIMAL,
        user_id         BIGINT,
        gender          VARCHAR,
        color           VARCHAR,
        metal           VARCHAR,
        gem             VARCHAR,
        PRIMARY KEY ((product_id), order_id)
    );
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa564195c10>

In [13]:
query = """
    SELECT table_name
    FROM system_schema.tables
    WHERE keyspace_name = 'data_playground'
"""
res = session.execute(query)
print_resultset_all(res)

Row(table_name='purchases')


In [14]:
query = "DESCRIBE TABLE purchases"
res = session.execute(query)
print(res.one())

Row(keyspace_name='data_playground', type='table', name='purchases', create_statement="CREATE TABLE data_playground.purchases (\n    product_id bigint,\n    order_id bigint,\n    brand_id bigint,\n    category_code text,\n    category_id bigint,\n    color text,\n    event_time timestamp,\n    gem text,\n    gender text,\n    metal text,\n    price decimal,\n    quantity smallint,\n    user_id bigint,\n    PRIMARY KEY (product_id, order_id)\n) WITH CLUSTERING ORDER BY (order_id ASC)\n    AND additional_write_policy = '99p'\n    AND bloom_filter_fp_chance = 0.01\n    AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n    AND cdc = false\n    AND comment = ''\n    AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}\n    AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n    AND memtable = 'default'\n    AND crc_check_chance = 1.0\n    AN

In [15]:
query = """
    SELECT *
    FROM purchases
"""
res = session.execute(query)
print_resultset_all(res)

None


In [16]:
query = """
    INSERT INTO purchases (
        event_time,
        order_id,
        product_id,
        quantity,
        category_id,
        category_code,
        brand_id,
        price,
        user_id,
        gender,
        color,
        metal,
        gem)
    VALUES (
        '2018-12-01 11:40:29 UTC',
        1924719191579951782,
        1842195256808833386,
        1,
        1806829201890738522,
        'jewelry.earring',
        0,
        561.51,
        1515915625207851155,
        NULL,
        'red',
        'gold',
        'diamond')
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa54ddc7380>

In [17]:
query = """
    SELECT *
    FROM purchases
"""
res = session.execute(query)
print_resultset_all(res)

Row(product_id=1842195256808833386, order_id=1924719191579951782, brand_id=0, category_code='jewelry.earring', category_id=1806829201890738522, color='red', event_time=datetime.datetime(2018, 12, 1, 11, 40, 29), gem='diamond', gender=None, metal='gold', price=Decimal('561.51'), quantity=1, user_id=1515915625207851155)


In [18]:
query = """
    SELECT order_id, product_id
    FROM purchases
"""
res = session.execute(query)
print_resultset_all(res)

Row(order_id=1924719191579951782, product_id=1842195256808833386)


In [19]:
query = """
    BEGIN BATCH
        INSERT INTO purchases (
            event_time,
            order_id,
            product_id,
            quantity,
            category_id,
            category_code,
            brand_id,
            price,
            user_id,
            gender,
            color,
            metal,
            gem)
        VALUES (
            '2018-12-01 17:38:31 UTC',
            1924899396621697920,
            1806829193678291446,
            1,
            1806829201848795479,
            NULL,
            NULL,
            212.14,
            1515915625071969944,
            NULL,
            'yellow',
            'gold',
            NULL);
        INSERT INTO purchases (
            event_time,
            order_id,
            product_id,
            quantity,
            category_id,
            category_code,
            brand_id,
            price,
            user_id,
            gender,
            color,
            metal,
            gem)
        VALUES (
            '2018-12-01 17:38:31 UTC',
            1924819390631697970,
            1806829193678291446,
            1,
            1806829201848795479,
            NULL,
            NULL,
            212.14,
            1515915625071969944,
            NULL,
            'yellow',
            'gold',
            NULL);
        INSERT INTO purchases (
            event_time,
            order_id,
            product_id,
            quantity,
            category_id,
            category_code,
            brand_id,
            price,
            user_id,
            gender,
            color,
            metal,
            gem)
        VALUES (
            '2018-12-02 13:53:42 UTC',
            1925511016616034733,
            1842214461889315556,
            1,
            1806829201915904347,
            'jewelry.pendant',
            1,
            54.66,
            1515915625048493557,
            'f',
            'white',
            'gold',
            'sapphire');
    APPLY BATCH;
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa565062d20>

In [20]:
query = """
    BEGIN UNLOGGED BATCH
        INSERT INTO purchases (
            event_time,
            order_id,
            product_id,
            quantity,
            category_id,
            category_code,
            brand_id,
            price,
            user_id,
            gender,
            color,
            metal,
            gem)
        VALUES (
            '2018-12-02 17:44:02 UTC',
            1925626951238681511,
            1835566849434059453,
            1,
            1806829201915904347,
            'jewelry.pendant',
            0,
            88.90,
            1515915625207630915,
            'f',
            'red',
            'gold',
            'diamond');
        INSERT INTO purchases (
            event_time,
            order_id,
            product_id,
            quantity,
            category_id,
            category_code,
            brand_id,
            price,
            user_id,
            gender,
            color,
            metal,
            gem)
        VALUES (
            '2018-12-02 21:30:19 UTC',
            1925740842841014667,
            1873936840742928865,
            1,
            1806829201924292956,
            'jewelry.necklace',
            0,
            417.67,
            1515915625175329378,
            NULL,
            'red',
            'gold',
            'amethyst');
    APPLY BATCH;
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa54ddce930>

In [21]:
query = """
    SELECT count(1)
    FROM purchases
"""
res = session.execute(query)
print_resultset_all(res)

Row(count=6)


In [22]:
query = """
    SELECT quantity
    FROM purchases
    WHERE product_id = 1842195256808833386
    AND order_id = 1924719191579951782
"""
res = session.execute(query)
print_resultset_all(res)

Row(quantity=1)


In [23]:
query = """
    UPDATE purchases
    SET quantity = 100
    WHERE product_id = 1842195256808833386
    AND order_id = 1924719191579951782
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa56568a0f0>

In [24]:
query = """
    SELECT quantity
    FROM purchases
    WHERE product_id = 1842195256808833386
    AND order_id = 1924719191579951782
"""
res = session.execute(query)
print_resultset_all(res)

Row(quantity=100)


In [25]:
query = """
    DELETE FROM purchases
    WHERE product_id = 1842195256808833386
    AND order_id = 1924719191579951782
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa565601f10>

In [26]:
query = """
    SELECT count(1)
    FROM purchases
    WHERE product_id = 1842195256808833386
    AND order_id = 1924719191579951782
"""
res = session.execute(query)
print_resultset_all(res)

Row(count=0)


In [27]:
query = """
    SELECT count(1)
    FROM purchases
    WHERE event_time >= '2018-12-01'
    AND event_time < '2018-12-02'
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Error from server: code=2200 [Invalid query] message="Cannot execute this query as it might involve data filtering and thus may have unpredictable performance. If you want to execute this query despite the performance unpredictability, use ALLOW FILTERING"


In [28]:
query = """
    SELECT count(1)
    FROM purchases
    WHERE product_id >= 1806829193678291446
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Error from server: code=2200 [Invalid query] message="Cannot execute this query as it might involve data filtering and thus may have unpredictable performance. If you want to execute this query despite the performance unpredictability, use ALLOW FILTERING"


In [29]:
query = """
    SELECT count(1)
    FROM purchases
    WHERE quantity = 7
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Error from server: code=2200 [Invalid query] message="Cannot execute this query as it might involve data filtering and thus may have unpredictable performance. If you want to execute this query despite the performance unpredictability, use ALLOW FILTERING"


In [30]:
query = """
    SELECT count(1)
    FROM purchases
    WHERE product_id >= 1806829193678291446
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Error from server: code=2200 [Invalid query] message="Cannot execute this query as it might involve data filtering and thus may have unpredictable performance. If you want to execute this query despite the performance unpredictability, use ALLOW FILTERING"


In [31]:
query = """
    SELECT count(1)
    FROM purchases
    WHERE product_id >= 1806829193678291446
    ALLOW FILTERING
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Row(count=5)


In [32]:
query = """
    SELECT product_id
    FROM purchases
    WHERE product_id >= 1806829193678291446
    ORDER BY order_id
    ALLOW FILTERING
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Error from server: code=2200 [Invalid query] message="ORDER BY is only supported when the partition key is restricted by an EQ or an IN."


In [33]:
query = """
    SELECT product_id, order_id
    FROM purchases
    WHERE product_id = 1806829193678291446
    ORDER BY order_id
    ALLOW FILTERING
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Row(product_id=1806829193678291446, order_id=1924819390631697970)
Row(product_id=1806829193678291446, order_id=1924899396621697920)


In [34]:
query = """
    SELECT product_id, order_id
    FROM purchases
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Row(product_id=1835566849434059453, order_id=1925626951238681511)
Row(product_id=1873936840742928865, order_id=1925740842841014667)
Row(product_id=1842214461889315556, order_id=1925511016616034733)
Row(product_id=1806829193678291446, order_id=1924819390631697970)
Row(product_id=1806829193678291446, order_id=1924899396621697920)


In [35]:
query = """
    SELECT product_id, order_id
    FROM purchases
    LIMIT 3
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Row(product_id=1835566849434059453, order_id=1925626951238681511)
Row(product_id=1873936840742928865, order_id=1925740842841014667)
Row(product_id=1842214461889315556, order_id=1925511016616034733)


In [36]:
query = """
    CREATE INDEX idx_purchases_qty ON purchases (quantity)
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa565061d90>

In [37]:
sleep(1)
query = """
    SELECT count(1)
    FROM purchases
    WHERE quantity = 7
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Row(count=0)


In [38]:
query = """
    DROP INDEX IF EXISTS idx_purchases_qty
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa5640ae630>

In [39]:
query = """
    SELECT count(1)
    FROM purchases
    WHERE quantity = 7
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Error from server: code=2200 [Invalid query] message="Cannot execute this query as it might involve data filtering and thus may have unpredictable performance. If you want to execute this query despite the performance unpredictability, use ALLOW FILTERING"


In [40]:
query = """
    CREATE MATERIALIZED VIEW vw_brand_prod AS
    SELECT product_id, order_id, brand_id
    FROM purchases
    WHERE product_id IS NOT NULL
    AND order_id IS NOT NULL
    PRIMARY KEY (product_id, order_id);
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa54dddab70>

In [41]:
query = """
    SELECT *
    FROM vw_brand_prod
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Row(product_id=1835566849434059453, order_id=1925626951238681511, brand_id=0)
Row(product_id=1873936840742928865, order_id=1925740842841014667, brand_id=0)


In [42]:
query = """
    DROP MATERIALIZED VIEW IF EXISTS vw_brand_prod
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa54dddadb0>

In [43]:
query = """
    DESCRIBE data_playground;
"""
res = session.execute(query)
print(res.one().create_statement)

CREATE KEYSPACE data_playground WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}  AND durable_writes = true;


This is the system info before setting up Cassandra cluster with multiple nodes:

```shell
$ docker exec -it cassandra1 nodetool status
Datacenter: datacenter1
=======================
Status=Up/Down
|/ State=Normal/Leaving/Joining/Moving
--  Address     Load        Tokens  Owns (effective)  Host ID                               Rack 
UN  172.18.0.2  197.25 KiB  16      100.0%            461a38c7-7e15-4868-aefb-671cc9494a84  rack1
```
```shell
$ docker exec -it cassandra1 nodetool getseeds
Seed node list does not contain any remote node IPs
```

This is the result of setting up the Cassandra cluster with multiple nodes:

```shell
$ docker exec -it cassandra1 nodetool status
Datacenter: datacenter1
=======================
Status=Up/Down
|/ State=Normal/Leaving/Joining/Moving
--  Address     Load        Tokens  Owns (effective)  Host ID                               Rack 
UN  172.18.0.3  104.33 KiB  16      100.0%            20538563-2355-4f7e-b8e7-7390172bc268  rack1
UN  172.18.0.2  104.34 KiB  16      100.0%            0817c6a1-a1a5-4993-8366-1e9f8e21e33f  rack1
```
```shell
$ docker exec -it cassandra1 nodetool getseeds
Current list of seed node IPs, excluding the current node's IP: cassandra2/172.18.0.3:7000
```
```shell
$ docker exec -it cassandra2 nodetool getseeds
Current list of seed node IPs, excluding the current node's IP: cassandra1/172.18.0.2:7000
```

These are the nodes where product_ids 1806829193678291446 and 1842214461889315556 are placed:

```shell
$ docker exec -it cassandra1 nodetool getendpoints data_playground purchases "1806829193678291446"
172.18.0.3
```
```shell
$ docker exec -it cassandra1 nodetool getendpoints data_playground purchases "1842214461889315556"
172.18.0.2
```

In [44]:
query = """
    ALTER KEYSPACE data_playground
    WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 2};
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa54ddd9be0>

In [45]:
query = """
    DESCRIBE data_playground;
"""
res = session.execute(query)
print(res.one().create_statement)

CREATE KEYSPACE data_playground WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '2'}  AND durable_writes = true;


These are the nodes where product_ids 1806829193678291446 and 1842214461889315556 are placed after changing replication_factor to 2:

```shell
$ docker exec -it cassandra1 nodetool getendpoints data_playground purchases "1806829193678291446"
172.18.0.3
172.18.0.2
```
```shell
$ docker exec -it cassandra1 nodetool getendpoints data_playground purchases "1842214461889315556"
172.18.0.3
172.18.0.2
```

In [46]:
query = """
    SELECT * FROM purchases;
"""
res = session.execute(query)
print_resultset_all(res)

Row(product_id=1842214461889315556, order_id=1925511016616034733, brand_id=1, category_code='jewelry.pendant', category_id=1806829201915904347, color='white', event_time=datetime.datetime(2018, 12, 2, 13, 53, 42), gem='sapphire', gender='f', metal='gold', price=Decimal('54.66'), quantity=1, user_id=1515915625048493557)
Row(product_id=1806829193678291446, order_id=1924819390631697970, brand_id=None, category_code=None, category_id=1806829201848795479, color='yellow', event_time=datetime.datetime(2018, 12, 1, 17, 38, 31), gem=None, gender=None, metal='gold', price=Decimal('212.14'), quantity=1, user_id=1515915625071969944)
Row(product_id=1806829193678291446, order_id=1924899396621697920, brand_id=None, category_code=None, category_id=1806829201848795479, color='yellow', event_time=datetime.datetime(2018, 12, 1, 17, 38, 31), gem=None, gender=None, metal='gold', price=Decimal('212.14'), quantity=1, user_id=1515915625071969944)


To repair can be done using `nodetool repair`:
```shell
$ docker exec -it cassandra1 nodetool repair
[2024-07-20 16:39:05,976] Starting repair command #1 (9468e7f0-46b6-11ef-8263-abbfc32045bc), repairing keyspace system_traces with repair options (parallelism: parallel, primary range: false, incremental: true, job threads: 1, ColumnFamilies: [], dataCenters: [], hosts: [], previewKind: NONE, # of ranges: 32, pull repair: false, force repair: false, optimise streams: false, ignore unreplicated keyspaces: false, repairPaxos: true, paxosOnly: false)
[2024-07-20 16:39:06,135] Repair session 947284e0-46b6-11ef-8263-abbfc32045bc for range [(-3259949072998122821,-1862889021449116627], (6984897915908523417,7779276293256371987], (269671177986920246,734279726764040745], (5544629686005940179,6189271740742473349], (-9048444484756136331,-8554822204106217575], (-1819680843095539581,-748180226755506769], (-5663777822722142737,-5195622831181496837], (9074249936990983245,-9048444484756136331], (734279726764040745,1087343110599271040], (3608955927822566162,3978387491983345878], (8056398532248556230,9074249936990983245], (-8554822204106217575,-8070299032240022255], (-5195622831181496837,-4420257817041550763], (4700253038514778421,5221907497663937850], (-6743150630169744394,-6291664731416435126], (-4104325720489284577,-3452893573963015265], (-243865725752073330,269671177986920246], (-1862889021449116627,-1819680843095539581], (-8070299032240022255,-7479066322880638035], (-6291664731416435126,-5663777822722142737], (2163098991824850579,2512914027587627873], (-7479066322880638035,-6743150630169744394], (-748180226755506769,-243865725752073330], (-4420257817041550763,-4104325720489284577], (7779276293256371987,8056398532248556230], (1087343110599271040,2163098991824850579], (5221907497663937850,5544629686005940179], (6189271740742473349,6984897915908523417], (3978387491983345878,4700253038514778421], (-3452893573963015265,-3259949072998122821], (2899014684535744220,3608955927822566162], (2512914027587627873,2899014684535744220]] finished (progress: 57%)
[2024-07-20 16:39:06,157] Repair completed successfully
[2024-07-20 16:39:06,162] Repair command #1 finished in 0 seconds
[2024-07-20 16:39:06,168] condition satisfied queried for parent session status and discovered repair completed.
[2024-07-20 16:39:06,169] Repair completed successfully
[2024-07-20 16:39:06,172] Replication factor is 1. No repair is needed for keyspace 'system_auth'
[2024-07-20 16:39:06,177] Starting repair command #2 (9488a4f0-46b6-11ef-8263-abbfc32045bc), repairing keyspace data_playground with repair options (parallelism: parallel, primary range: false, incremental: true, job threads: 1, ColumnFamilies: [], dataCenters: [], hosts: [], previewKind: NONE, # of ranges: 32, pull repair: false, force repair: false, optimise streams: false, ignore unreplicated keyspaces: false, repairPaxos: true, paxosOnly: false)
[2024-07-20 16:39:06,380] Repair session 948b6410-46b6-11ef-8263-abbfc32045bc for range [(-3259949072998122821,-1862889021449116627], (6984897915908523417,7779276293256371987], (269671177986920246,734279726764040745], (5544629686005940179,6189271740742473349], (-9048444484756136331,-8554822204106217575], (-1819680843095539581,-748180226755506769], (-5663777822722142737,-5195622831181496837], (9074249936990983245,-9048444484756136331], (734279726764040745,1087343110599271040], (3608955927822566162,3978387491983345878], (8056398532248556230,9074249936990983245], (-8554822204106217575,-8070299032240022255], (-5195622831181496837,-4420257817041550763], (4700253038514778421,5221907497663937850], (-6743150630169744394,-6291664731416435126], (-4104325720489284577,-3452893573963015265], (-243865725752073330,269671177986920246], (-1862889021449116627,-1819680843095539581], (-8070299032240022255,-7479066322880638035], (-6291664731416435126,-5663777822722142737], (2163098991824850579,2512914027587627873], (-7479066322880638035,-6743150630169744394], (-748180226755506769,-243865725752073330], (-4420257817041550763,-4104325720489284577], (7779276293256371987,8056398532248556230], (1087343110599271040,2163098991824850579], (5221907497663937850,5544629686005940179], (6189271740742473349,6984897915908523417], (3978387491983345878,4700253038514778421], (-3452893573963015265,-3259949072998122821], (2899014684535744220,3608955927822566162], (2512914027587627873,2899014684535744220]] finished (progress: 57%)
[2024-07-20 16:39:06,402] Repair completed successfully
[2024-07-20 16:39:06,407] Repair command #2 finished in 0 seconds
[2024-07-20 16:39:06,409] condition satisfied queried for parent session status and discovered repair completed.
[2024-07-20 16:39:06,409] Repair completed successfully
```

A backup can be done through snapshots using nodetool:

```shell
$ docker exec -it cassandra1 nodetool snapshot data_playground
Requested creating snapshot(s) for [data_playground] with snapshot name [1721498691881] and options {skipFlush=false}
Snapshot directory: 1721498691881
```

To restore the table you you just need to first run the schema.cql file:

```shell
$ docker exec cassandra1 cqlsh -u admin -p admin -f /var/lib/cassandra/data/data_playground/purchases-35d29e5046c211ef93faadd4c437d7ed/snapshots/1721498691881/schema.cql cassandra1

Warning: Using a password on the command line interface can be insecure.
Recommendation: use the credentials file to securely provide the password.


```

And then refresh the data with sstableloader:

```shell
$ docker exec -it cassandra1 sstableloader -d cassandra1 -k data_playground /var/lib/cassandra/data/data_playground/purchases-35d29e5046c211ef93faadd4c437d7ed/snapshots/1721498691881    
Established connection to initial hosts
Opening sstables and calculating sections to stream
Streaming relevant part of /var/lib/cassandra/data/data_playground/purchases-35d29e5046c211ef93faadd4c437d7ed/snapshots/1721498691881/nb-1-big-Data.db  to [/172.18.0.2:7000, /172.18.0.3:7000]
progress: [/172.18.0.2:7000]0:1/7 3  % total: 3% 0.073KiB/s (avg: 0.073KiB/s)
progress: [/172.18.0.2:7000]0:2/7 4  % total: 4% 10.885KiB/s (avg: 0.082KiB/s)
progress: [/172.18.0.2:7000]0:3/7 97 % total: 97% 10.065MiB/s (avg: 1.889KiB/s)
progress: [/172.18.0.2:7000]0:4/7 98 % total: 98% 89.077KiB/s (avg: 1.904KiB/s)
progress: [/172.18.0.2:7000]0:5/7 98 % total: 98% 13.896KiB/s (avg: 1.909KiB/s)
progress: [/172.18.0.2:7000]0:6/7 99 % [/172.18.0.3:7000]0:0/7 0  % total: 49% 118.205KiB/s (avg: 1.932KiB/s)
progress: [/172.18.0.2:7000]0:7/7 100% [/172.18.0.3:7000]0:0/7 0  % total: 50% 8.339KiB/s (avg: 1.935KiB/s)
progress: [/172.18.0.2:7000]0:7/7 100% [/172.18.0.3:7000]0:1/7 3  % total: 51% 39.762KiB/s (avg: 2.005KiB/s)
progress: [/172.18.0.2:7000]0:7/7 100% [/172.18.0.3:7000]0:2/7 4  % total: 52% 28.718KiB/s (avg: 2.012KiB/s)
progress: [/172.18.0.2:7000]0:7/7 100% [/172.18.0.3:7000]0:3/7 97 % total: 98% 2.033MiB/s (avg: 3.810KiB/s)
progress: [/172.18.0.2:7000]0:7/7 100% [/172.18.0.3:7000]0:4/7 98 % total: 99% 40.365KiB/s (avg: 3.824KiB/s)
progress: [/172.18.0.2:7000]0:7/7 100% [/172.18.0.3:7000]0:5/7 98 % total: 99% 10.129KiB/s (avg: 3.827KiB/s)
progress: [/172.18.0.2:7000]0:7/7 100% [/172.18.0.3:7000]0:6/7 99 % total: 99% 58.138KiB/s (avg: 3.849KiB/s)
progress: [/172.18.0.2:7000]0:7/7 100% [/172.18.0.3:7000]0:7/7 100% total: 100% 8.742KiB/s (avg: 3.851KiB/s)
progress: [/172.18.0.2:7000]0:7/7 100% [/172.18.0.3:7000]0:7/7 100% total: 100% 0.000KiB/s (avg: 3.790KiB/s)
progress: [/172.18.0.2:7000]0:7/7 100% [/172.18.0.3:7000]0:7/7 100% total: 100% 0.000KiB/s (avg: 3.781KiB/s)

Summary statistics: 
   Connections per host    : 1         
   Total files transferred : 7         
   Total bytes transferred : 11.104KiB 
   Total duration          : 3098 ms   
   Average transfer rate   : 3.583KiB/s
   Peak transfer rate      : 3.851KiB/s

```

In [47]:
query = """
    ALTER TABLE purchases ADD flag_physical_store BOOLEAN;
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa5640ad3d0>

In [48]:
query = """
    SELECT flag_physical_store FROM purchases;
"""
res = session.execute(query)
print_resultset_all(res)

Row(flag_physical_store=None)
Row(flag_physical_store=None)
Row(flag_physical_store=None)


In [49]:
query = """
    ALTER TABLE purchases DROP flag_physical_store;
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa54ddc5850>

In [50]:
query = """
    SELECT flag_physical_store FROM purchases;
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Error from server: code=2200 [Invalid query] message="Undefined column name flag_physical_store in table data_playground.purchases"


In [51]:
query = """
    DROP TYPE IF EXISTS category_hierarchy;
"""
session.execute(query)
query = """
    CREATE TYPE IF NOT EXISTS category_hierarchy (root BIGINT, mid BIGINT, leaf BIGINT);
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa54dddbe90>

In [52]:
query = """
    CREATE TABLE IF NOT EXISTS waldo(
        product_id          BIGINT,
        prod_hierarchy      CATEGORY_HIERARCHY,
        PRIMARY KEY (product_id)
    );
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa54ddd8a10>

In [53]:
query = """
    INSERT INTO waldo (product_id, prod_hierarchy)
    VALUES (3, {root: 1, mid: 2, leaf: 3});
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa54ddd9550>

In [54]:
query = """
    SELECT * FROM waldo;
"""
res = session.execute(query)
print_resultset_all(res)

Row(product_id=3, prod_hierarchy=category_hierarchy(root=1, mid=2, leaf=3))


In [55]:
query = """
    SELECT product_id, prod_hierarchy.root FROM waldo;
"""
res = session.execute(query)
print_resultset_all(res)

Row(product_id=3, prod_hierarchy_root=1)


In [56]:
query = """
    UPDATE waldo
    SET prod_hierarchy = {root : 0}
    WHERE product_id = 3
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa5656b6ae0>

In [57]:
query = """
    SELECT product_id, prod_hierarchy.root FROM waldo;
"""
res = session.execute(query)
print_resultset_all(res)

Row(product_id=3, prod_hierarchy_root=0)


In [58]:
query = """
    DROP TYPE IF EXISTS CATEGORY_HIERARCHY
"""
try:
    res = session.execute(query)
    print_resultset_all(res)
except InvalidRequest as e:
    print(e)

Error from server: code=2200 [Invalid query] message="Cannot drop user type 'data_playground.category_hierarchy' as it is still used by tables waldo"


In [59]:
query = """
    DROP TABLE IF EXISTS waldo
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa56403b7d0>

In [60]:
query = """
    DROP TYPE IF EXISTS CATEGORY_HIERARCHY
"""
try:
    session.execute(query)
except InvalidRequest as e:
    print(e)

In [61]:
query = """
    CREATE TABLE IF NOT EXISTS sherley(
        product_id          BIGINT,
        linked_products     SET<BIGINT>,
        PRIMARY KEY (product_id)
    );
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa564165370>

In [62]:
query = """
    INSERT INTO sherley (product_id, linked_products)
    VALUES (1, {2, 3, 4, 5});
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa56407b260>

In [63]:
query = """
    SELECT * FROM sherley;
"""
res = session.execute(query)
print_resultset_all(res)

Row(product_id=1, linked_products=SortedSet([2, 3, 4, 5]))


In [64]:
query = """
    UPDATE sherley
    SET linked_products = linked_products + {99}
    WHERE product_id = 1;
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa54ddc73b0>

In [65]:
query = """
    SELECT * FROM sherley;
"""
res = session.execute(query)
print_resultset_all(res)

Row(product_id=1, linked_products=SortedSet([2, 3, 4, 5, 99]))


In [66]:
query = """
    UPDATE sherley
    SET linked_products = linked_products - {1}
    WHERE product_id = 1;
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa54ddc4fb0>

In [67]:
query = """
    SELECT * FROM sherley;
"""
res = session.execute(query)
print_resultset_all(res)

Row(product_id=1, linked_products=SortedSet([2, 3, 4, 5, 99]))


In [68]:
query = """
    SELECT linked_products FROM sherley;
"""
res = session.execute(query)
print_resultset_all(res)

Row(linked_products=SortedSet([2, 3, 4, 5, 99]))


In [69]:
query = """
    BEGIN BATCH
        DELETE FROM purchases
        WHERE product_id = 1806829193678291446
        AND order_id = 1924899396621697920;
        DELETE FROM purchases
        WHERE product_id = 1842214461889315556
        AND order_id = 1925511016616034733;
    APPLY BATCH;
"""
res = session.execute(query)
print_resultset_all(res)

None


In [70]:
query = """
    SELECT count(1)
    FROM purchases
"""
res = session.execute(query)
print_resultset_all(res)

Row(count=2)


In [71]:
query = """
    DROP TABLE IF EXISTS purchases;
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa565082e70>

In [72]:
query = """
    DROP KEYSPACE IF EXISTS data_playground;
"""
session.execute(query)

<cassandra.cluster.ResultSet at 0x7fa56400ed50>