# Apache Cassandra Project

## Creating queries to answer the following three questions of the data

### 1. Get the artist, song title and song's length in the music app history that was heard during  sessionId = 338, and itemInSession  = 4


### 2. Get only the following: name of artist, song (sorted by itemInSession) and user (first and last name) for userid = 10, sessionid = 182
    

### 3. Get every user name (first and last) in my music app history who listened to the song 'All Hands Against His Own'

#### Checking out the `event_datafile.csv` file

In [49]:
# Import Python packages
import pandas as pd
import cassandra
import csv

In [5]:
pd.read_csv('event_datafile.csv').head()

Unnamed: 0,artist,firstName,gender,itemInSession,lastName,length,level,location,sessionId,song,userId
0,Barry Tuckwell/Academy of St Martin-in-the-Fie...,Mohammad,M,0,Rodriguez,277.15873,paid,"Sacramento--Roseville--Arden-Arcade, CA",961,Horn Concerto No. 4 in E flat K495: II. Romanc...,88
1,Jimi Hendrix,Mohammad,M,1,Rodriguez,239.82975,paid,"Sacramento--Roseville--Arden-Arcade, CA",961,Woodstock Inprovisation,88
2,Building 429,Mohammad,M,2,Rodriguez,300.61669,paid,"Sacramento--Roseville--Arden-Arcade, CA",961,Majesty (LP Version),88
3,The B-52's,Gianna,F,0,Jones,321.54077,free,"New York-Newark-Jersey City, NY-NJ-PA",107,Love Shack,38
4,Die Mooskirchner,Gianna,F,1,Jones,169.29914,free,"New York-Newark-Jersey City, NY-NJ-PA",107,Frisch und g'sund,38


#### Create sparkify keyspace

In [6]:
from cassandra.cluster import Cluster

# Create cluster and session
cluster = Cluster(['127.0.0.1'])
session = cluster.connect()

In [7]:
# Drop keyspace udacity if it already exists
session.execute('DROP KEYSPACE IF EXISTS sparkify')

# Create keyspace udacity
session.execute("""CREATE KEYSPACE sparkify
                WITH REPLICATION = 
                {'class': 'SimpleStrategy', 'replication_factor': 1}
                """)

# Set keyspace to udacity
session.set_keyspace('sparkify')

#### Query 1 Get the artist, song title and song's length in the music app history that was heard during  sessionId = 338, and itemInSession  = 4

In [8]:
session.execute("DROP TABLE IF EXISTS song_playlist_item_in_session")
session.execute("""
CREATE TABLE IF NOT EXISTS song_playlist_item_in_session 
(session_id int, 
item_in_session int, 
artist text, 
song_title text, 
song_length decimal, 
PRIMARY KEY (session_id, item_in_session))
""")

<cassandra.cluster.ResultSet at 0x7f795b0af208>

In [9]:
file = 'event_datafile_new.csv'

with open(file, 'r', encoding='utf8') as file:
    csvreader = csv.reader(file)
    next(csvreader) # skip the header
    
    for line in csvreader:
        session.execute("""
        INSERT INTO song_playlist_item_in_session 
        (session_id, item_in_session, artist, song_title, song_length)
        VALUES (%s, %s, %s, %s, %s)
        """,
        (int(line[8]), int(line[3]), line[0], line[9], float(line[5]))
                       )

In [10]:
rows = session.execute("""
SELECT artist, song_title, song_length 
FROM song_playlist_item_in_session 
WHERE session_id = 338 AND item_in_session = 4
"""
)

for row in rows:
    print(row.artist, row.song_title, row.song_length)

Faithless Music Matters (Mark Knight Dub) 495.3073


#### Query 2 Give me only the following: name of artist, song (sorted by itemInSession) and user (first and last name) for userid = 10, sessionid = 182

In [11]:
session.execute("DROP TABLE IF EXISTS song_playlist_session")
session.execute("""
CREATE TABLE IF NOT EXISTS song_playlist_session 
(user_id int, session_id int, item_in_session int, artist text, song text, first_name text, last_name text,
PRIMARY KEY ((user_id, session_id), item_in_session))
"""
)

<cassandra.cluster.ResultSet at 0x7f795b0af470>

In [12]:
file = 'event_datafile_new.csv'

with open(file, 'r', encoding='utf8') as file:
    csvreader = csv.reader(file)
    next(csvreader)
    
    for line in csvreader:
        try:
            session.execute("""
            INSERT INTO song_playlist_session 
            (user_id, session_id, item_in_session, artist, song, first_name, last_name) 
            VALUES (%s, %s, %s, %s, %s, %s, %s)
            """ , 
            (int(line[10]), int(line[8]), int(line[3]), str(line[0]), str(line[9]), str(line[1]), str(line[4]))
            )
        except Exception as e:
            print('Error inserting row')
            print(e)

In [13]:
rows = session.execute("""
SELECT artist, song, first_name, last_name 
FROM song_playlist_session 
WHERE user_id = 10 AND session_id = 182
"""
)

for row in rows:
    print(row.artist, row.song, row.last_name, row.first_name)

Down To The Bone Keep On Keepin' On Cruz Sylvie
Three Drives Greece 2000 Cruz Sylvie
Sebastien Tellier Kilometer Cruz Sylvie
Lonnie Gordon Catch You Baby (Steve Pitron & Max Sanna Radio Edit) Cruz Sylvie


#### Query 3 Give me every user name (first and last) in my music app history who listened to the song 'All Hands Against His Own'

In [14]:
session.execute('DROP TABLE IF EXISTS song_playlist_song')
session.execute("""
CREATE TABLE IF NOT EXISTS song_playlist_song 
(song text, user_id int, first_name text, last_name text, 
PRIMARY KEY (song, user_id))
"""
)

<cassandra.cluster.ResultSet at 0x7f795b11b470>

In [15]:
file = 'event_datafile_new.csv'

with open(file, 'r', encoding='utf8') as file:
    csvreader = csv.reader(file)
    next(csvreader)
    
    for line in csvreader:
        session.execute("""
        INSERT INTO song_playlist_song 
        (song, user_id, first_name, last_name) 
        VALUES (%s, %s, %s, %s)
        """ , 
        (str(line[9]), int(line[10]), str(line[1]), str(line[4]))
        )

In [16]:
rows = session.execute("""
SELECT first_name, last_name 
FROM song_playlist_song 
WHERE song = 'All Hands Against His Own'
"""
)

for row in rows:
    print(row.first_name, row.last_name)

Jacqueline Lynch
Tegan Levine
Sara Johnson


### Drop tables before closing out the sessions

In [17]:
session.execute("DROP TABLE IF EXISTS song_playlist_item_in_session")
session.execute("DROP TABLE IF EXISTS song_playlist_session")
session.execute("DROP TABLE IF EXISTS song_playlist_song")

<cassandra.cluster.ResultSet at 0x7f795b06dbe0>

### Close the session and cluster connection

In [18]:
session.shutdown()
cluster.shutdown()