# Example Cassandra Queries through Python

First of all, let's connect to the DB.

(before, make sure your Cassandra container is running)

In [1]:
from cassandra.cluster import Cluster

cluster = Cluster(['172.17.0.2'])
session = cluster.connect()

Create a new keyspace (unless it already exists):

In [2]:
session.execute("""
CREATE KEYSPACE IF NOT EXISTS adss
WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' }
""")

<cassandra.cluster.ResultSet at 0x7fa404fd3eb0>

Activate it:

In [3]:
session.set_keyspace('adss')

Create your first table:

In [4]:
session.execute(
"""
CREATE TABLE IF NOT EXISTS session (number int PRIMARY KEY, topic text, classroom text);
"""
)

<cassandra.cluster.ResultSet at 0x7fa42d94d7b0>

Describe all tables in the DB (note how we have to convert the result to a list):

In [5]:
result = session.execute(
"""
DESCRIBE TABLES;
"""
)
print(list(result))

[Row(keyspace_name='adss', type='table', name='courses'), Row(keyspace_name='adss', type='table', name='degree'), Row(keyspace_name='adss', type='table', name='session')]


Describe one table:

In [6]:
result = session.execute(
"""
DESCRIBE adss.session;
"""
)
print(list(result))

[Row(keyspace_name='adss', type='table', name='session', create_statement="CREATE TABLE adss.session (\n    number int PRIMARY KEY,\n    classroom text,\n    topic text\n) WITH additional_write_policy = '99p'\n    AND bloom_filter_fp_chance = 0.01\n    AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n    AND cdc = false\n    AND comment = ''\n    AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}\n    AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n    AND memtable = 'default'\n    AND crc_check_chance = 1.0\n    AND default_time_to_live = 0\n    AND extensions = {}\n    AND gc_grace_seconds = 864000\n    AND max_index_interval = 2048\n    AND memtable_flush_period_in_ms = 0\n    AND min_index_interval = 128\n    AND read_repair = 'BLOCKING'\n    AND speculative_retry = '99p';")]


Add two rows into our new table `session`:

In [7]:
result = session.execute(
"""
INSERT INTO session (number, classroom, topic) VALUES (20, '3.02', 'Column Family Databases');
"""
)

result = session.execute(
"""
INSERT INTO session (number, topic) VALUES (21, 'Column Family Databases');
"""
)

*Try adding one row with only the topic, no number or classroom*:

In [10]:
############### YOUR CODE HERE
result = session.execute(
    '''
    INSERT INTO session (topic) VALUES ('Column Family Databases')
    '''

)

InvalidRequest: Error from server: code=2200 [Invalid query] message="Some partition key parts are missing: number"

Print all table contents:

In [11]:
result = session.execute(
"""
SELECT * FROM session;
"""
)
print(list(result))

[Row(number=20, classroom='3.02', topic='Column Family Databases'), Row(number=21, classroom=None, topic='Column Family Databases')]


Find the sessions that cover the topic of Column Family DB’s:

In [12]:
result = session.execute(
"""
SELECT number FROM session WHERE topic = 'Column Family Databases' ALLOW FILTERING;
"""
)
print(list(result))

[Row(number=20), Row(number=21)]


Now, let's try without the ALLOW FILTERING:

In [13]:
result = session.execute(
"""
SELECT number FROM session WHERE topic = 'Column Family Databases';
"""
)
print(list(result))

InvalidRequest: Error from server: code=2200 [Invalid query] message="Cannot execute this query as it might involve data filtering and thus may have unpredictable performance. If you want to execute this query despite the performance unpredictability, use ALLOW FILTERING"