In [18]:
from cassandra.cluster import Cluster

cluster = Cluster(['127.0.0.1'], port=9042)
session = cluster.connect()


In [23]:

create_keyspace = """
create keyspace statements with
    replication = {'class': 'NetworkTopologyStrategy', 'DC1': 3}
AND
    durable_writes = true;
"""
session.execute(create_keyspace)


AlreadyExists: Keyspace 'statements' already exists

In [27]:
session.execute("USE statements")

create_property_statements_table = """
create table property_statements (
    id varchar,
    observation_id varchar,
    created_at timestamp,
    updated_at timestamp,
    property_name varchar,
    property_type varchar,
    property_value_varchar varchar,
    property_value_blob blob,
    property_value_int bigint,
    property_value_float double,
    property_value_bool boolean,
    property_value_datetime timestamp,
    property_value_uuid uuid,
    primary key (id)
);
"""
session.execute(create_property_statements_table)

<cassandra.cluster.ResultSet at 0x1124a25d0>

In [34]:
session.execute("USE statements")

create_observation_statements_table = """
create table observation_statements (
    id varchar,
    created_at timestamp,
    updated_at timestamp,
    statement_type varchar,
    statement_id varchar,
    primary key (id)
);
"""
session.execute(create_observation_statements_table)

<cassandra.cluster.ResultSet at 0x112a0d190>

In [35]:
session.execute("USE statements")

create_observation_statements_statement_id_index = """
create index observation_statements_statement_id_index on observation_statements (statement_id);
"""

session.execute(create_observation_statements_statement_id_index)

<cassandra.cluster.ResultSet at 0x10d43df40>

In [36]:
session.execute("USE statements")

create_observation_statements_statement_type_index = """
create index observation_statements_statement_type_index on observation_statements (statement_type);
"""

session.execute(create_observation_statements_statement_type_index)

<cassandra.cluster.ResultSet at 0x112a0d760>

In [29]:
session.execute("USE statements")

create_property_statements_observation_id_index = """
create index property_statements_observation_id_index on property_statements (observation_id);
"""

session.execute(create_property_statements_observation_id_index)

<cassandra.cluster.ResultSet at 0x1129973e0>

In [30]:
session.execute("USE statements")

create_property_statements_property_name_index = """
create index property_statements_property_name_index on property_statements (property_name);
"""

session.execute(create_property_statements_property_name_index)

<cassandra.cluster.ResultSet at 0x112a3b050>

In [32]:
def insert_observation_statement(id: str, created_at: str, statement_type: str, statement_id: str):
    
    return (f"""
        INSERT INTO observation_statements (id, created_at, updated_at, statement_type, statement_id)
        VALUES (%s, %s, %s, %s, %s)
    """,
    (id, created_at, created_at, statement_type, statement_id))

def insert_property_statement(id: str, observation_id: str, created_at: str, property_name: str, property_type:str, property_value: any):
    
    return (f"""
        INSERT INTO property_statements (id, observation_id, created_at, updated_at, property_name, property_type, property_value_{property_type})
        VALUES (%s, %s, %s, %s, %s, %s, %s)
    """,
    (id, observation_id, created_at, created_at, property_name, property_type, property_value))

In [33]:
import uuid
from datetime import datetime, timezone
import random

session.execute("USE statements")

for i in range(10000):
    current_datetime = datetime.now(timezone.utc)
    formatted_datetime = current_datetime.strftime('%Y-%m-%dT%H:%M:%S.') + current_datetime.strftime('%f')[:3] + '+0000'

    property_names = ['first_name', 'last_name', 'email', 'phone_number', 'address', 'city', 'state', 'zip_code', 'country', 'age', 'date_of_birth', 'is_active', 'trust_score']

    property_types_and_values = {
        'first_name': ('varchar', 'John'),
        'last_name': ('varchar', 'Doe'),
        'email': ('varchar', 'john@someemail.com'),
        'phone_number': ('varchar', '123-456-7890'),
        'address': ('varchar', '123 Main St'),
        'city': ('varchar', 'Springfield'),
        'state': ('varchar', 'IL'),
        'zip_code': ('varchar', '62701'),
        'country': ('varchar', 'USA'),
        'age': ('int', 30),
        'date_of_birth': ('datetime', '1978-11-22T00:00:00.000+0000'),
        'is_active': ('bool', True),
        'trust_score': ('float', 0.95)
    }

    property_name = random.choice(property_names)

    property_type, property_value = property_types_and_values[property_name]

    session.execute(insert_property_statement(f"{uuid.uuid4()}", f"{uuid.uuid4()}", formatted_datetime, property_name, property_type, property_value))