# MID TERM 

##### Author: Priyam
##### U88016402

### Adding a new functionality to chat bot

### Import packages

In [48]:
import pandas as pd
from cassandra.cluster import Cluster

### Create a session connection to Cassandra cluster

In [49]:
clstr = Cluster()
session = clstr.connect()

### Use session to 'talk' to cassandra

#### Checking if we already have keyspace chat and dropping it if it is available

In [66]:
session.execute("DROP KEYSPACE IF EXISTS chat")

<cassandra.cluster.ResultSet at 0x7f48ede66ad0>

In [51]:
rows = session.execute("desc keyspaces")
for row in rows:
    print(f"{row[0]}")

m14
system
system_auth
system_distributed
system_schema
system_traces
system_views
system_virtual_schema
w04python


In [67]:
session.execute("""
    CREATE KEYSPACE IF NOT EXISTS chat WITH REPLICATION = {
        'class':'SimpleStrategy', 
        'replication_factor':3
    }
""")

<cassandra.cluster.ResultSet at 0x7f48dee744d0>

In [68]:
rows = session.execute("desc keyspaces")
for row in rows:
    print(f"{row[0]}")

chat
m14
system
system_auth
system_distributed
system_schema
system_traces
system_views
system_virtual_schema
w04python


Create a user table for our task

In [69]:
session.execute("""
CREATE TABLE IF NOT EXISTS chat.user ( 
    user_id INT, 
    date TEXT, 
    type TEXT, 
    file INT, 
    PRIMARY KEY(user_id, type)
);
""")

<cassandra.cluster.ResultSet at 0x7f48defd2550>

We are taking user _id and type of file as our primary keys as these will help in easier read and write of the data at later stages

### Load data

In [70]:
df = pd.read_csv('data.csv')
df

Unnamed: 0,user_id,date,type,file
0,1,'08-05-2001','image',100112
1,2,'08-06-2022','image',100117
2,3,'07-06-2010','video',100119
3,4,'09-07-2023','file',100113
4,5,'01-08-1996','video',100115
5,6,'01-07-1996','file',100485
6,7,'06-09-2023','video,100985
7,8,'08-06-2022','image',100785
8,9,'02-05-1996','file',100452
9,10,'06-07-2022','file',100852


#### CHecking if our data is iterable

In [71]:
for index, row in df.iterrows():
    print(f"user_id = {row.user_id}, date ={row.date}, type = {row.type}, file = {row.file}")

user_id = 1, date ='08-05-2001', type = 'image', file = 100112
user_id = 2, date ='08-06-2022', type = 'image', file = 100117
user_id = 3, date ='07-06-2010', type = 'video', file = 100119
user_id = 4, date ='09-07-2023', type = 'file', file = 100113
user_id = 5, date ='01-08-1996', type = 'video', file = 100115
user_id = 6, date ='01-07-1996', type = 'file', file = 100485
user_id = 7, date ='06-09-2023', type = 'video, file = 100985
user_id = 8, date ='08-06-2022', type = 'image', file = 100785
user_id = 9, date ='02-05-1996', type = 'file', file = 100452
user_id = 10, date ='06-07-2022', type = 'file', file = 100852


#### Inserting the data into our Table 'user'

In [72]:
for index, row in df.iterrows():
    # Properly format and escape the string values
    formatted_date = str(row['date']).replace("'", "''")  # replace single quote with two single quotes for SQL
    formatted_type = str(row['type']).replace("'", "''")



    # Use the formatted strings in the SQL statement
    sql_statement = f"""
        INSERT INTO chat.user (user_id, date, type, file)     
        VALUES ({row['user_id']}, '{formatted_date}', '{formatted_type}', {row['file']});
        """

    print(sql_statement)  # For debugging
    session.execute(sql_statement)


        INSERT INTO chat.user (user_id, date, type, file)     
        VALUES (1, '''08-05-2001''', '''image''', 100112);
        

        INSERT INTO chat.user (user_id, date, type, file)     
        VALUES (2, '''08-06-2022''', '''image''', 100117);
        

        INSERT INTO chat.user (user_id, date, type, file)     
        VALUES (3, '''07-06-2010''', '''video''', 100119);
        

        INSERT INTO chat.user (user_id, date, type, file)     
        VALUES (4, '''09-07-2023''', '''file''', 100113);
        

        INSERT INTO chat.user (user_id, date, type, file)     
        VALUES (5, '''01-08-1996''', '''video''', 100115);
        

        INSERT INTO chat.user (user_id, date, type, file)     
        VALUES (6, '''01-07-1996''', '''file''', 100485);
        

        INSERT INTO chat.user (user_id, date, type, file)     
        VALUES (7, '''06-09-2023''', '''video', 100985);
        

        INSERT INTO chat.user (user_id, date, type, file)     
        VALUES (

#### Running a Select query to observe our table data

In [73]:
rows = session.execute("select (user_id, date, type, file) from chat.user")
for row in rows:
    print(f"user_id={row[0][0]}, date={row[0][1]}, type={row[0][2]}, file={row[0][3]}")


user_id=5, date='01-08-1996', type='video', file=100115
user_id=10, date='06-07-2022', type='file', file=100852
user_id=1, date='08-05-2001', type='image', file=100112
user_id=8, date='08-06-2022', type='image', file=100785
user_id=2, date='08-06-2022', type='image', file=100117
user_id=4, date='09-07-2023', type='file', file=100113
user_id=7, date='06-09-2023', type='video, file=100985
user_id=6, date='01-07-1996', type='file', file=100485
user_id=9, date='02-05-1996', type='file', file=100452
user_id=3, date='07-06-2010', type='video', file=100119


#### Inserting data into our table

In [74]:
rows = f"""INSERT INTO chat.user (user_id, date, type, file)   
        VALUES (12, '10-05-2006', 'image', 100524)"""
session.execute(rows)

<cassandra.cluster.ResultSet at 0x7f48deeeb890>

#### Checking if data has been inserted or not

In [75]:
rows = session.execute("select (user_id, date, type, file) from chat.user")
for row in rows:
    print(f"user_id={row[0][0]}, date={row[0][1]}, type={row[0][2]}, file={row[0][3]}")


user_id=5, date='01-08-1996', type='video', file=100115
user_id=10, date='06-07-2022', type='file', file=100852
user_id=1, date='08-05-2001', type='image', file=100112
user_id=8, date='08-06-2022', type='image', file=100785
user_id=2, date='08-06-2022', type='image', file=100117
user_id=4, date='09-07-2023', type='file', file=100113
user_id=7, date='06-09-2023', type='video, file=100985
user_id=6, date='01-07-1996', type='file', file=100485
user_id=9, date='02-05-1996', type='file', file=100452
user_id=12, date=10-05-2006, type=image, file=100524
user_id=3, date='07-06-2010', type='video', file=100119


We can see here that our data has been added database.

### To search for and return an attachment

##### We will create index since it better way without using Allow Filtering. Allow Filtering does a full table scan which is less preferred.

In [76]:
# Create an index on type in the user table
session.execute("""
CREATE INDEX IF NOT EXISTS type
ON chat.user (type);
""")

<cassandra.cluster.ResultSet at 0x7f48dee74bd0>

In [77]:
rows = session.execute("SELECT (user_id, date, type, file) FROM chat.user WHERE type = 'image'")
for row in rows:
    print(f"{row[0][0]}, {row[0][1]}, {row[0][2]}, {row[0][3]}")
    

12, 10-05-2006, image, 100524


#### We got all the attachments with type 'image'. 

### Deletion a row from our table

In [78]:
rows= (("DELETE FROM chat.user  WHERE user_id =12"))
session.execute(rows)


<cassandra.cluster.ResultSet at 0x7f48dee704d0>

In [79]:
rows = session.execute("select (user_id, date, type, file) from chat.user")
for row in rows:
    print(f"user_id={row[0][0]}, date={row[0][1]}, type={row[0][2]}, file={row[0][3]}")

user_id=5, date='01-08-1996', type='video', file=100115
user_id=10, date='06-07-2022', type='file', file=100852
user_id=1, date='08-05-2001', type='image', file=100112
user_id=8, date='08-06-2022', type='image', file=100785
user_id=2, date='08-06-2022', type='image', file=100117
user_id=4, date='09-07-2023', type='file', file=100113
user_id=7, date='06-09-2023', type='video, file=100985
user_id=6, date='01-07-1996', type='file', file=100485
user_id=9, date='02-05-1996', type='file', file=100452
user_id=3, date='07-06-2010', type='video', file=100119


#### We deleted the user_id '12', so let's search for it if is still available or not

In [80]:
rows = session.execute("SELECT (user_id, date, type, file) FROM chat.user WHERE user_id = 12 allow filtering")
for row in rows:
    print(f"{row[0][0]}, {row[0][1]}, {row[0][2]}, {row[0][3]}")

#### We don't have it anymore as it has been deleted