# W04 Cassandra Python Tutorial

### Import packages

In [47]:
import pandas as pd
from cassandra.cluster import Cluster

### Load data

In [48]:
df = pd.read_csv('./w04.csv')
df

Unnamed: 0,student_id,f_name,l_name,email,gpa
0,'00001','Tim','Smith','smith515@usf.edu',4.0
1,'00002','John','Jones','jjones@somewhere.com',3.0
2,'00003','Jane','Williams','jane@somewhere.com',3.5
3,'00004','Betty','Johnson','bjohn@somewhere.com',2.2
4,'00005','Jim','Bean','jbean@somewhere.com',3.1
5,'00006','Frank','Gambali','frankg@somewhere.com',2.7
6,'00007','Judy','Garland','judy@somewhere.com',3.9


### Create a session connection to Cassandra cluster

In [49]:
clstr = Cluster()
session = clstr.connect()

### Use session to 'talk' to cassandra

In [50]:
session.execute("CREATE KEYSPACE IF NOT EXISTS w04python WITH REPLICATION = {'class':'SimpleStrategy', 'replication_factor':1}")

<cassandra.cluster.ResultSet at 0x7fd75824b7f0>

In [51]:
rows = session.execute("desc keyspaces")

In [52]:
rows = session.execute("desc keyspaces")
for row in rows:
    print(f"{row[0]}")

cassandratutorial
system
system_auth
system_distributed
system_schema
system_traces
system_views
system_virtual_schema
w04
w04python


In [53]:
session.execute("""
CREATE TABLE IF NOT EXISTS w04python.student ( 
    student_id TEXT, 
    f_name TEXT, 
    l_name TEXT, 
    email TEXT, 
    gpa FLOAT, 
    PRIMARY KEY(student_id)
);
""")

<cassandra.cluster.ResultSet at 0x7fd75825d090>

In [54]:
df.head(3)

Unnamed: 0,student_id,f_name,l_name,email,gpa
0,'00001','Tim','Smith','smith515@usf.edu',4.0
1,'00002','John','Jones','jjones@somewhere.com',3.0
2,'00003','Jane','Williams','jane@somewhere.com',3.5


In [55]:
for index, row in df.iterrows():
    print(f"student_id = {row[0]}, f_name = {row[1]}, l_name = {row[2]}, email = {row[3]}, gpa = {row[4]}")

student_id = '00001', f_name =  'Tim', l_name =  'Smith', email =  'smith515@usf.edu', gpa = 4.0
student_id = '00002', f_name =  'John', l_name =  'Jones', email =  'jjones@somewhere.com', gpa = 3.0
student_id = '00003', f_name =  'Jane', l_name =  'Williams', email =  'jane@somewhere.com', gpa = 3.5
student_id = '00004', f_name =  'Betty', l_name =  'Johnson', email =  'bjohn@somewhere.com', gpa = 2.2
student_id = '00005', f_name =  'Jim', l_name =  'Bean', email =  'jbean@somewhere.com', gpa = 3.1
student_id = '00006', f_name =  'Frank', l_name =  'Gambali', email =  'frankg@somewhere.com', gpa = 2.7
student_id = '00007', f_name =  'Judy', l_name =  'Garland', email =  'judy@somewhere.com', gpa = 3.9


In [56]:
for index, row in df.iterrows():
    session.execute(f"""
        INSERT INTO w04python.student (student_id, f_name, l_name, email, gpa)     
        VALUES ({row[0]}, {row[1]}, {row[2]}, {row[3]}, {row[4]});
        """
       )



In [61]:
rows = session.execute("select (student_id, f_name, l_name, email, gpa) from w04python.student")
for row in rows:
    print(f"student_id={row[0][0]}, f_name={row[0][1]}, l_name={row[0][2]}, email={row[0][3]}, gpa={row[0][4]}")


student_id=00004, f_name=Betty, l_name=Johnson, email=bjohn@somewhere.com, gpa=2.200000047683716
student_id=00006, f_name=Frank, l_name=Gambali, email=frankg@somewhere.com, gpa=2.700000047683716
student_id=00002, f_name=John, l_name=Jones, email=jjones@somewhere.com, gpa=3.0
student_id=00007, f_name=Judy, l_name=Garland, email=judy@somewhere.com, gpa=3.9000000953674316
student_id=00001, f_name=Tim, l_name=Smith, email=smith515@usf.edu, gpa=4.0
student_id=00003, f_name=Jane, l_name=Williams, email=jane@somewhere.com, gpa=3.5
student_id=00005, f_name=Jim, l_name=Bean, email=jbean@somewhere.com, gpa=3.0999999046325684


In [63]:
rows = session.execute("select (student_id, f_name, l_name, email, gpa) from w04python.student where gpa > 3.7 ALLOW FILTERING")
for row in rows:
    print(f"student_id={row[0][0]}, f_name={row[0][1]}, l_name={row[0][2]}, email={row[0][3]}, gpa={row[0][4]}")

student_id=00007, f_name=Judy, l_name=Garland, email=judy@somewhere.com, gpa=3.9000000953674316
student_id=00001, f_name=Tim, l_name=Smith, email=smith515@usf.edu, gpa=4.0


## Challenge

Let's say we want to split students into honors, good, at_risk, and failing (honors is for students >= 3.7, good is >=3, at_risk >= 2.5, and failing < 2.5)

Can you come up with a way to search for students belonging to one of these categories? WITHOUT ALLOW FILTERING?
