In [21]:
from cassandra.cluster import Cluster
import random

nodes = {
    'cassandra1': ('127.0.0.1', 9042),
    'cassandra2': ('127.0.0.1', 9043),
    'cassandra3': ('127.0.0.1', 9044),
}

def random_node():
    return random.choice(list(nodes.keys()))

def connect_to_cassandra(node):
    return Cluster([nodes[node][0]], port=nodes[node][1]).connect()

def create_keyspace():
    session = connect_to_cassandra(random_node())

    ### Create keyspace
    create_keyspace = """
create keyspace if not exists statements with
    replication = {'class': 'NetworkTopologyStrategy', 'DC1': 3}
AND
    durable_writes = true;
"""
    session.execute(create_keyspace)

    ### Create tables
    session.execute("USE statements")

    create_property_statements_table = """
create table if not exists property_statements (
    id varchar,
    observation_id varchar,
    created_at timestamp,
    updated_at timestamp,
    property_name varchar,
    property_type varchar,
    property_value_varchar varchar,
    property_value_blob blob,
    property_value_int bigint,
    property_value_float double,
    property_value_bool boolean,
    property_value_datetime timestamp,
    property_value_uuid uuid,
    primary key (id)
) WITH compaction = {
    'class' : 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy',
    'only_purge_repaired_tombstones' : 'true'
};
"""
    session.execute(create_property_statements_table)

    ## Create observation_id index
    create_property_statements_observation_id_index = """
create index property_statements_observation_id_index on 
property_statements (observation_id);
"""

    ## Create property_name index
    session.execute(create_property_statements_observation_id_index)

    create_property_statements_property_name_index = """
create index property_statements_property_name_index on property_statements (property_name);
"""

    session.execute(create_property_statements_property_name_index)

def nuke_keyspace():
    session = connect_to_cassandra(random_node())

    nuke_keyspace = "drop keyspace if exists statements;"
    session.execute(nuke_keyspace)

def insert_property_statement(id: str, observation_id: str, created_at: str, property_name: str, property_type:str, property_value: any):
    
    return (f"""
        INSERT INTO property_statements (id, observation_id, created_at, updated_at, property_name, property_type, property_value_{property_type})
        VALUES (%s, %s, %s, %s, %s, %s, %s)
    """,
    (id, observation_id, created_at, created_at, property_name, property_type, property_value))

# Create the keyspace

In [24]:
create_keyspace()

# Nuke the keyspace

In [23]:
nuke_keyspace()

# Insert some random data

In [26]:
import uuid
from datetime import datetime, timezone
from cassandra.cluster import ConsistencyLevel
from cassandra.query import SimpleStatement

session = connect_to_cassandra(random_node())
session.execute("USE statements;")

for i in range(1000):
    current_datetime = datetime.now(timezone.utc)
    formatted_datetime = current_datetime.strftime('%Y-%m-%dT%H:%M:%S.') + current_datetime.strftime('%f')[:3] + '+0000'

    property_names = ['first_name', 'last_name', 'email', 'phone_number', 'address', 'city', 'state', 'zip_code', 'country', 'age', 'date_of_birth', 'is_active', 'trust_score']

    property_types_and_values = {
        'first_name': ('varchar', 'John'),
        'last_name': ('varchar', 'Doe'),
        'email': ('varchar', 'john@someemail.com'),
        'phone_number': ('varchar', '123-456-7890'),
        'address': ('varchar', '123 Main St'),
        'city': ('varchar', 'Springfield'),
        'state': ('varchar', 'IL'),
        'zip_code': ('varchar', '62701'),
        'country': ('varchar', 'USA'),
        'age': ('int', 30),
        'date_of_birth': ('datetime', '1978-11-22T00:00:00.000+0000'),
        'is_active': ('bool', True),
        'trust_score': ('float', 0.95)
    }

    property_name = random.choice(property_names)

    property_type, property_value = property_types_and_values[property_name]

    cql, params = insert_property_statement(f"{uuid.uuid4()}", f"{uuid.uuid4()}", formatted_datetime, property_name, property_type, property_value)

    statement = SimpleStatement(
        query_string=cql,
        consistency_level=ConsistencyLevel.LOCAL_QUORUM,
        is_idempotent=True
    )

    session.execute(statement, params)