# Mid Term 1 - Part 2

### Import packages
#### Importing all the necessary packages

In [65]:
import pandas as pd
from cassandra.cluster import Cluster

### Create a session connection to Cassandra cluster

In [66]:
clstr = Cluster()
session = clstr.connect()

### Use session to 'talk' to cassandra

#### Drop keyspace if already existing 

In [67]:
# We are dropping the keyspace if already existing
session.execute("DROP KEYSPACE IF EXISTS songs") 

<cassandra.cluster.ResultSet at 0x7f3a2c249b40>

#### Checking whether the keyspace has been dropped or not

In [68]:
rows = session.execute("desc keyspaces")
for row in rows:
    print(f"{row[0]}")  # We see that the keyspace 'songs' is not present 

bd23
system
system_auth
system_distributed
system_schema
system_traces
system_views
system_virtual_schema
w04
w04python


### Create a keyspace named 'songs'

#### Discussion about rationale for creating a keyspace:
#### We are creating a keyspace (schema in RDBMS) as songs with the replication fator as 2, so that we can replicate the data at 2 different nodes.  

In [69]:
# We are creating a keyspace 'songs' with replication factor of 2 
session.execute("CREATE KEYSPACE IF NOT EXISTS songs WITH REPLICATION = {'class':'SimpleStrategy', 'replication_factor':2}") 

<cassandra.cluster.ResultSet at 0x7f3a2c393d00>

#### Check whether the keyspace has been created or not

In [70]:
rows = session.execute("desc keyspaces")
for row in rows:
    print(f"{row[0]}")  #Songs is present here

bd23
songs
system
system_auth
system_distributed
system_schema
system_traces
system_views
system_virtual_schema
w04
w04python


### Create a songs_info table for data for the data storage

#### Discussion about rationale for creating keys:
#### We are creating keys in the songs_info table as songs_id, title, duration, genre as asked in the question with the primary key as song_id as that is required as a unique attribute in the question.  

In [71]:
session.execute("""
CREATE TABLE IF NOT EXISTS songs.songs_info ( 
    song_id INT, 
    title TEXT, 
    duration INT, 
    genre TEXT, 
    PRIMARY KEY(song_id)
);
""")

<cassandra.cluster.ResultSet at 0x7f3a2c30b490>

### Load the data created in the data.csv file 

In [72]:
df = pd.read_csv('data.csv')
df #Loading the data into the dataframe first and checking wheteher the data is present or not

Unnamed: 0,song_id,title,duration,genre
0,1001,'Song A',10,'pop'
1,1002,'Song B',20,'rock'
2,1003,'Song C',30,'classical'
3,1004,'Song D',40,'country'
4,1005,'Song E',50,'pop'
5,1006,'Song F',55,'pop'
6,1007,'Song G',10,'rock'
7,1008,'Song H',20,'classical'
8,1009,'Song I',30,'country'
9,1010,'Song J',30,'rock'


In [73]:
for index, row in df.iterrows():
    print(f"song_id = {row.song_id}, title ={row.title}, duration = {row.duration}, genre = {row.genre}") # Assigning the data from the csv to the variables

song_id = 1001, title ='Song A', duration = 10, genre = 'pop'
song_id = 1002, title ='Song B', duration = 20, genre = 'rock'
song_id = 1003, title ='Song C', duration = 30, genre = 'classical'
song_id = 1004, title ='Song D', duration = 40, genre = 'country'
song_id = 1005, title ='Song E', duration = 50, genre = 'pop'
song_id = 1006, title ='Song F', duration = 55, genre = 'pop'
song_id = 1007, title ='Song G', duration = 10, genre = 'rock'
song_id = 1008, title ='Song H', duration = 20, genre = 'classical'
song_id = 1009, title ='Song I', duration = 30, genre = 'country'
song_id = 1010, title ='Song J', duration = 30, genre = 'rock'


### Inserting the data loaded into the table from the dataframe

In [74]:
for index, row in df.iterrows():
    print(f"""
        INSERT INTO songs.songs_info (song_id,title,duration,genre)     
        VALUES ({row[0]}, {row[1]}, {row[2]}, {row[3]});
        """
       ) # printing the query which will be executed
    session.execute(f"""
        INSERT INTO songs.songs_info (song_id,title,duration,genre)     
        VALUES ({row[0]}, {row[1]}, {row[2]}, {row[3]});
        """
       ) # inserting the data from session.execute


        INSERT INTO songs.songs_info (song_id,title,duration,genre)     
        VALUES (1001, 'Song A', 10, 'pop');
        

        INSERT INTO songs.songs_info (song_id,title,duration,genre)     
        VALUES (1002, 'Song B', 20, 'rock');
        

        INSERT INTO songs.songs_info (song_id,title,duration,genre)     
        VALUES (1003, 'Song C', 30, 'classical');
        

        INSERT INTO songs.songs_info (song_id,title,duration,genre)     
        VALUES (1004, 'Song D', 40, 'country');
        

        INSERT INTO songs.songs_info (song_id,title,duration,genre)     
        VALUES (1005, 'Song E', 50, 'pop');
        

        INSERT INTO songs.songs_info (song_id,title,duration,genre)     
        VALUES (1006, 'Song F', 55, 'pop');
        

        INSERT INTO songs.songs_info (song_id,title,duration,genre)     
        VALUES (1007, 'Song G', 10, 'rock');
        

        INSERT INTO songs.songs_info (song_id,title,duration,genre)     
        VALUES (1008, 'So

### Check the songs data is loaded into the the table or not

In [75]:
rows = session.execute("select (song_id,title,duration,genre) from songs.songs_info")
for row in rows:
    print(f"song_id={row[0][0]}, title={row[0][1]}, duration={row[0][2]}, genre={row[0][3]}")


song_id=1009, title=Song I, duration=30, genre=country
song_id=1006, title=Song F, duration=55, genre=pop
song_id=1008, title=Song H, duration=20, genre=classical
song_id=1004, title=Song D, duration=40, genre=country
song_id=1007, title=Song G, duration=10, genre=rock
song_id=1005, title=Song E, duration=50, genre=pop
song_id=1001, title=Song A, duration=10, genre=pop
song_id=1003, title=Song C, duration=30, genre=classical
song_id=1002, title=Song B, duration=20, genre=rock
song_id=1010, title=Song J, duration=30, genre=rock


### Select rows with where clause to see data based on the condition

In [76]:
rows = session.execute("select (song_id,title,duration,genre) from songs.songs_info where duration = 10 ALLOW FILTERING")
for row in rows:
    print(f"{row[0][0]}, {row[0][1]}, {row[0][2]}, {row[0][3]}")

1007, Song G, 10, rock
1001, Song A, 10, pop


### Insert the value directly to the existing the table

In [77]:
session.execute("""
INSERT INTO songs.songs_info (song_id,title,duration,genre)     
        VALUES (1011, 'Song K', 10, 'rock');
""")

<cassandra.cluster.ResultSet at 0x7f3a2c276ef0>

### Check the table whether the value got inserted or not

In [78]:
rows = session.execute("select (song_id,title,duration,genre) from songs.songs_info")
for row in rows:
    print(f"{row[0][0]}, {row[0][1]}, {row[0][2]}, {row[0][3]}") # The new value inserted is present

1009, Song I, 30, country
1006, Song F, 55, pop
1008, Song H, 20, classical
1011, Song K, 10, rock
1004, Song D, 40, country
1007, Song G, 10, rock
1005, Song E, 50, pop
1001, Song A, 10, pop
1003, Song C, 30, classical
1002, Song B, 20, rock
1010, Song J, 30, rock


### Update a row in the existing table

In [79]:
session.execute("""
update songs.songs_info set title = 'Updated Song F' where song_id = 1006;
""") # updating the song_id = 1006 with a new name that is Updated Song F

<cassandra.cluster.ResultSet at 0x7f3a2c3915a0>

### Check whether the row got updated or not

In [80]:
rows = session.execute("select (song_id,title,duration,genre) from songs.songs_info")
for row in rows:
    print(f"{row[0][0]}, {row[0][1]}, {row[0][2]}, {row[0][3]}") # The row with song id = 1006 got updated with 'Updated Song F'

1009, Song I, 30, country
1006, Updated Song F, 55, pop
1008, Song H, 20, classical
1011, Song K, 10, rock
1004, Song D, 40, country
1007, Song G, 10, rock
1005, Song E, 50, pop
1001, Song A, 10, pop
1003, Song C, 30, classical
1002, Song B, 20, rock
1010, Song J, 30, rock


### Delete a row from the existing table

In [81]:
session.execute("""
delete from songs.songs_info where song_id = 1008;
""") # Delete the row with song_id = 1008

<cassandra.cluster.ResultSet at 0x7f3a2c3ca440>

### Check whether the row got deleted or not

In [82]:
rows = session.execute("select (song_id,title,duration,genre) from songs.songs_info")
for row in rows:
    print(f"{row[0][0]}, {row[0][1]}, {row[0][2]}, {row[0][3]}") # The row got deleted with is = 1008

1009, Song I, 30, country
1006, Updated Song F, 55, pop
1011, Song K, 10, rock
1004, Song D, 40, country
1007, Song G, 10, rock
1005, Song E, 50, pop
1001, Song A, 10, pop
1003, Song C, 30, classical
1002, Song B, 20, rock
1010, Song J, 30, rock


### Clean the things up : Drop Keyspace and exit

In [83]:
# Clean things up
session.execute("""
DROP KEYSPACE songs;
""") # The keyspace got dropped

# exit cql
exit

<cassandra.cluster.ResultSet at 0x7f3a2c391000>