In [None]:
# For MongoDB
# For Cassandra
from cassandra.cluster import Cluster
from pymongo import MongoClient
import pandas as pd
import uuid
import psutil
import time

In [None]:

mongo_client = MongoClient('mongodb://localhost:27017/')
mongo_db = mongo_client['Twitter']
mongo_collection = mongo_db['Preprocessed_Data']

print('MONGO OBJ CREATED')

cassandra_cluster = Cluster(['localhost'])
cassandra_session = cassandra_cluster.connect()
cassandra_session.execute("""
    CREATE KEYSPACE IF NOT EXISTS Twitter
    WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}
""")
# cassandra_session.set_keyspace('Twitter')
cassandra_session.execute("""
    CREATE TABLE IF NOT EXISTS Twitter.Preprocessed_Dataset (
        id uuid PRIMARY KEY,
        Date text,
        sentiment_score text,
        sentiment text
    )
""")
print('Cassandra OBJ CREATED')

In [None]:

# Create a sample DataFrame
df = pd.read_csv('sentiment_results.csv')





# Measure CPU usage, memory utilization, and core utilization
start_cpu = psutil.cpu_percent()
start_mem = psutil.virtual_memory().percent
start_cores = psutil.cpu_count(logical=False)

# Measure insertion time for MongoDB
start = time.time()
mongo_collection.insert_many(df.to_dict('records'))
end = time.time()
mongo_insert_time = end - start
print('DATAFRAME ADDED TO MONGO')




In [None]:
# Measure insertion time for Cassandra
start = time.time()

for row in df.itertuples():
    cassandra_session.execute("""
        INSERT INTO Twitter.Preprocessed_Dataset (id, Date, sentiment_score, sentiment )
        VALUES (%s, %s, %s, %s)
    """, (uuid.uuid4(), row.Date,  str(row.sentiment_score), row.sentiment ), timeout = 600)
    
end = time.time()
cassandra_insert_time = end - start
print('DATAFRAME ADDED TO CASSANDRA')



In [None]:
# Measure update time for MongoDB
start = time.time()
mongo_collection.update_many({}, {'$set': {'sentiment_score': 50}})
end = time.time()
mongo_update_time = end - start
print("MONGODB UPDATED")



In [None]:
# Measure update time for Cassandra
start = time.time()
row_uuid = uuid.UUID('f625d687-19db-480c-9e4e-6e27eefd2c06')
cassandra_session.execute("""
    UPDATE Twitter.Preprocessed_Dataset SET sentiment_score = '50' where id = %s;
""", (row_uuid,))
end = time.time()
cassandra_update_time = end - start
print("CASSANDRA UPDATED")



In [None]:
# Measure creation time for MongoDB
start = time.time()
mongo_db.create_collection('new_cgoullection')
end = time.time()
mongo_create_time = end - start

# Measure creation time for Cassandra
start = time.time()
cassandra_session.execute("""
    CREATE TABLE IF NOT EXISTS Twitter.new_table (
        id uuid PRIMARY KEY,
        name text,
        age int,
        city text
    )
""")
end = time.time()
cassandra_create_time = end - start


In [None]:

# Measure CPU usage, memory utilization, and core utilization again
end_cpu = psutil.cpu_percent()
end_mem = psutil.virtual_memory().used

print("Calculation Done")

end_cores = psutil.cpu_count(logical=False)


# Create a DataFrame with the measured values
df = pd.DataFrame({
    'Database': ['MongoDB', 'Cassandra'],
    'CPU Usage (%)': [end_cpu - start_cpu] * 2,
    'Memory Utilization (%)': [end_mem - start_mem] * 2,
    'Core Utilization': [end_cores - start_cores] * 2,
    'Insertion Time (s)': [mongo_insert_time, cassandra_insert_time],
    'Update Time (s)': [mongo_update_time, cassandra_update_time],
    'Creation Time (s)': [mongo_create_time, cassandra_create_time]
})

# Save the DataFrame as a CSV file
df.to_csv('database_comparison.csv', index=False)