# Lets do simple operations on Cassandra. We are going to use Amazon Keyspaces here for this demo. 

In [18]:
## I am using this plugin to connect using SSL - https://github.com/aws-samples/amazon-keyspaces-examples/tree/main/python/datastax-v3/connection-sigv4

In [2]:
! pip install cassandra-driver

Collecting cassandra-driver
  Downloading cassandra_driver-3.25.0-cp36-cp36m-manylinux1_x86_64.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 25.3 MB/s eta 0:00:01
Collecting geomet<0.3,>=0.1
  Downloading geomet-0.2.1.post1-py3-none-any.whl (18 kB)
Installing collected packages: geomet, cassandra-driver
Successfully installed cassandra-driver-3.25.0 geomet-0.2.1.post1
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/python3/bin/python -m pip install --upgrade pip' command.[0m


In [8]:
!pip install /home/ec2-user/SageMaker/utils/datastax-v3/connection-sigv4

Processing /home/ec2-user/SageMaker/utils/datastax-v3/connection-sigv4
[33m  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.
   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.[0m
Collecting cassandra-sigv4
  Using cached cassandra_sigv4-4.0.2-py2.py3-none-any.whl (9.8 kB)
Building wheels for collected packages: sigv4-sample
  Building wheel for sigv4-sample (setup.py) ... [?25ldone
[?25h  Created wheel for sigv4-sample: filename=sigv4_sample-1.0-py3-none-any.whl size=4032 sha256=2fcf071bb0fc86ff60aeccdee3320868d89c435632395e32ff72574dcd22928b
  Stored in directory: /home/ec2-user/.cache/pip/wheels/1d/eb/da/4c682c3570a87e40e9ac8cd38c0c3bcf47abaeab0c79da0d56
Successfully built sigv4-sample
Inst

In [9]:
#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0
#
try:
    import os
    import boto3
    import ssl
    import sys
    from boto3 import Session
    from cassandra_sigv4.auth import AuthProvider, Authenticator, SigV4AuthProvider
    from ssl import SSLContext, PROTOCOL_TLSv1_2, CERT_REQUIRED
    from cassandra.cluster import Cluster
    from cassandra import ConsistencyLevel
    from cassandra.query import SimpleStatement
except ImportError:
    raise RuntimeError('Required packages Failed To install please run "python Setup.py install" command or install '
                       'using pip')


In [19]:
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
cert_path = os.path.join(os.getcwd(), 'resources/sf-class2-root.crt')
ssl_context.load_verify_locations(cert_path)
ssl_context.verify_mode = CERT_REQUIRED

# this will automatically pull the credentials from either the
# ~/.aws/credentials file
# ~/.aws/config 
# or from the boto environment variables.
boto_session = boto3.Session()


# verify that the session is set correctly
credentials = boto_session.get_credentials()

if not credentials or not credentials.access_key:
    sys.exit("No access key found, please setup credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) according to https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html#cli-configure-quickstart-precedence\n")


region = boto_session.region_name

if not region:  
    sys.exit("You do not have a region set.  Set environment variable AWS_REGION or provide a configuration see https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html#cli-configure-quickstart-precedence\n")


auth_provider = SigV4AuthProvider(boto_session)
contact_point = "cassandra.{}.amazonaws.com".format(region)

cluster = Cluster([contact_point], 
                 ssl_context=ssl_context, 
                 auth_provider=auth_provider,
                 port=9142)

session = cluster.connect()
print("CONNECTION TO KEYSPACES SUCCESSFUL2")

rows = session.execute('select * from system_schema.keyspaces')
print("PRINTING SCHEMA INFORMATION2")
for r in rows.current_rows:  
    print("Found Keyspace: {}".format(r.keyspace_name))




CONNECTION TO KEYSPACES SUCCESSFUL2
PRINTING SCHEMA INFORMATION2
Found Keyspace: system_schema
Found Keyspace: system_schema_mcs
Found Keyspace: system
Found Keyspace: my_demo_keyspace


### Lets create a new keyspace

In [21]:
try:
    session.execute("""CREATE KEYSPACE "test" \
                    WITH REPLICATION = {'class': 'SingleRegionStrategy'}""")
except Exception as e:
    print(e)

# Set the specific Keyspaces, note that the connection is alread established above.

In [22]:
try:
    session.set_keyspace('test')
except Exception as e:
    print(e)  

### Begin with creating a Music Library of albums. Each album has a lot of information we could add to the music library table. We will  start with album name, artist name, year. 

### But ...Stop

### We are working with Apache Cassandra a NoSQL database. We can't model our data and create our table without more information.

### Think about what queries will you be performing on this data?

#### We want to be able to get every album that was released in a particular year. 
`select * from music_library WHERE YEAR=1970`

*To do that:* <ol><li> We need to be able to do a WHERE on YEAR. <li>YEAR will become my partition key,<li>artist name will be my clustering column to make each Primary Key unique. <li>**Remember there are no duplicates in Apache Cassandra.**</ol>

**Table Name:** music_library<br>
**column 1:** Album Name<br>
**column 2:** Artist Name<br>
**column 3:** Year <br>
PRIMARY KEY(year, artist name)


### Now to translate this information into a Create Table Statement. 
More information on Data Types can be found here: https://datastax.github.io/python-driver/<br>


In [26]:
query = "CREATE TABLE IF NOT EXISTS music_library (year int, artist_name text, album_name text, PRIMARY KEY (year, artist_name))"

In [27]:
try:
    session.execute(query)
except Exception as e:
    print(e)  

In [30]:
query = "select count(*) from music_library"
try:
    count = session.execute(query)
except Exception as e:
    print(e)  
    


Error from server: code=2200 [Invalid query] message="countRows is not yet supported."


In [32]:
query = "INSERT INTO music_library (year , artist_name, album_name)"
query = query + " VALUES (%s,%s,%s)"

try:
    session.execute(query, (1970, "The Beatles", "Let it be"))
except Exception as e:
    print(e)  
    
try:
    session.execute(query, (1965, "The Beatles", "Rubber Soul"))
except Exception as e:
    print(e)  
    

Error from server: code=2200 [Invalid query] message="Consistency level LOCAL_ONE is not supported for this operation. Supported consistency levels are: LOCAL_QUORUM"
Error from server: code=2200 [Invalid query] message="Consistency level LOCAL_ONE is not supported for this operation. Supported consistency levels are: LOCAL_QUORUM"



#### Amazon KeySpaces currently Supports only Local_Quorum for writes and hence we need to specify this as the driver is using Local_one by default. We are trying a shortcut to define it as default.








In [47]:
# https://docs.datastax.com/en/developer/python-driver/3.25/getting_started/#setting-a-consistency-level
# Already imported in the beginning
# from cassandra import ConsistencyLevel
# from cassandra.query import SimpleStatement

In [46]:
query = SimpleStatement("INSERT INTO music_library (year , artist_name, album_name) VALUES (%s,%s,%s)",
    consistency_level=ConsistencyLevel.LOCAL_QUORUM)

try:
    session.execute(query, (1970, "The Beatles", "Let it be"))
except Exception as e:
    print(e)  
    
try:
    session.execute(query, (1965, "The Beatles", "Rubber Soul"))
except Exception as e:
    print(e)  
    


### Lets check the data which we inserted

In [37]:
query = "select * from music_library"
try:
    count = session.execute(query)
except Exception as e:
    print(e)  
    

In [41]:
count.all()

[Row(year=1970, artist_name='The Beatles', album_name='Let it be'),
 Row(year=1965, artist_name='The Beatles', album_name='Rubber Soul')]

In [44]:
query = "select * from music_library WHERE YEAR=1970"
try:
    count = session.execute(query)
except Exception as e:
    print(e)  
count.all()

[Row(year=1970, artist_name='The Beatles', album_name='Let it be')]