# Querying
Record times to execute queries in different databases of variable sizes

### Libraries

In [12]:
import pandas as pd
import mysql.connector
import os
import time
import pymongo
import datetime
import redis
from redisgraph import Graph
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from insert import ins

In [3]:
%load_ext dotenv
%dotenv

### Some Constants

In [4]:
MYSQL = "MySQL"
MONGO_DB = 'MongoDB'
REDIS = 'Redis'

In [5]:
HUNDRED = 'hundred'
FIVE_HUNDRED = 'five_hundred'
THOUSAND = 'thousand'
TEN_THOUSAND = 'ten_thousand'
LAKH = 'lakh'
FIVE_MILLION = 'five_mil'

### Query

In [6]:
def sql_query(query, cursor):
    begin = time.time()
    cursor.execute(query)
    cursor.fetchall()
    end = time.time()
    return end - begin

def mongo_query(db, col, query):
    begin = time.time()
    db[col].aggregate(query)
    end = time.time()
    return end - begin

def redis_query(graph, query):
    begin = time.time()
    graph.query(query)
    end = time.time()
    return end - begin

In [7]:
def exp(size, insert=True):
    if insert:
        ins(size)
    obs = {}
    # MySQL
    obs[MYSQL] = {}
    try:
        conn = mysql.connector.connect(
            host='localhost',
            database='social',
            user=os.getenv('MYSQL_USER'),
            password=os.getenv('MYSQL_PASS'),
            auth_plugin='mysql_native_password'
        )
        cursor = conn.cursor()
        # (1)
        query = """
            SELECT name, surname 
            FROM user JOIN comment USING (user_id);
        """
        obs[MYSQL][1] = sql_query(query, cursor)
        # (2)
        query = """
            SELECT * FROM user
            JOIN comment USING (user_id)
            WHERE text LIKE "%today%";
        """
        obs[MYSQL][2] = sql_query(query, cursor)
        # (3)
        query = """
            SELECT
                name,
                surname,
                created_at
            FROM user JOIN comment USING (user_id)
            WHERE
                created_at >= CURRENT_DATE AND
                created_at < DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY);
        """
        obs[MYSQL][3] = sql_query(query, cursor)
        conn.close()
    except mysql.connector.Error as error:
        print(f"Failed to connect to MySQL: {error}")
    # MongoDB
    obs[MONGO_DB] = {}
    client = pymongo.MongoClient('mongodb://localhost:27017/')
    db = client['test']
    # (1)
    query = [
        {
            '$lookup': {
                'from': 'comment',
                'localField': '_id',
                'foreignField': 'user_id',
                'as': 'comments',
            },
        },
        {
            '$match': {
                'comments': { '$not': { '$size': 0 } }
            }
        }
    ]
    obs[MONGO_DB][1] = mongo_query(db, 'user', query)
    # (2)
    query = [
        {
            '$match': {
                'text': { '$regex': 'today' }
            },
        },
        {
            '$lookup': {
                'from': 'user',
                'localField': 'user_id',
                'foreignField': '_id',
                'as': 'user',
            }
        }
    ]
    obs[MONGO_DB][2] = mongo_query(db, 'comment', query)
    # (3)
    query = [
        {
            '$match': {
                'created_at': { '$regex': f'^{datetime.date.today()}' }
            },
        },
        {
            '$lookup': {
                'from': 'user',
                'localField': 'user_id',
                'foreignField': '_id',
                'as': 'author',
            }
        }
    ]
    obs[MONGO_DB][3] = mongo_query(db, 'comment', query)
    # Redis
    SOCIAL = 'social'
    obs[REDIS] = {}
    client = redis.Redis(
        host='localhost',
        port=6379,
    )
    graph = Graph(SOCIAL, client)
    # (1)
    query = """
        MATCH r = (u:user)-[:wrote]->(c:comment)
        RETURN r
    """
    obs[REDIS][1] = redis_query(graph, query)
    # (2)
    query = """
        MATCH r = (u:user)-[:wrote]->(c:comment)
        WHERE c.text CONTAINS 'today'
        RETURN r
    """
    obs[REDIS][2] = redis_query(graph, query)
    # (3)
    query = f"""
        MATCH r = (u:user)-[:wrote]->(c:comment)
        WHERE c.created_at STARTS WITH '%s'
        RETURN r
    """ % (f'{datetime.date.today()}')
    obs[REDIS][3] = redis_query(graph, query)
    print(obs)
    return obs

## Experiment

In [10]:
result = {}

SIZES = [
    HUNDRED,
    FIVE_HUNDRED,
    THOUSAND,
    TEN_THOUSAND
]

for size in SIZES:
    print()
    print(size)
    result[size] = exp(size)


hundred

MySQL
----------
Deleting old records
Inserting users
##########
Inserting comments
##########

MongoDB
----------
Deleting old records
Inserting users
####################
Inserting comments
####################

Redis
----------
Inserting users
##########
Cached execution 0.0
internal execution time 0.259223
Inserting comments
##########
Cached execution 0.0
internal execution time 0.1977
Creating relations
##########
{'MySQL': {1: 0.00413060188293457, 2: 0.07970905303955078, 3: 0.0010268688201904297}, 'MongoDB': {1: 0.03306746482849121, 2: 0.0017173290252685547, 3: 0.0010867118835449219}, 'Redis': {1: 0.006537675857543945, 2: 0.0007801055908203125, 3: 0.00055694580078125}}

five_hundred

MySQL
----------
Deleting old records
Inserting users
##########
Inserting comments
##########

MongoDB
----------
Deleting old records
Inserting users
####################
Inserting comments
####################

Redis
----------
Inserting users
##########
Cached execution 0.0
internal ex

In [11]:
print(result)

{'hundred': {'MySQL': {1: 0.00413060188293457, 2: 0.07970905303955078, 3: 0.0010268688201904297}, 'MongoDB': {1: 0.03306746482849121, 2: 0.0017173290252685547, 3: 0.0010867118835449219}, 'Redis': {1: 0.006537675857543945, 2: 0.0007801055908203125, 3: 0.00055694580078125}}, 'five_hundred': {'MySQL': {1: 0.015064716339111328, 2: 0.539982795715332, 3: 0.002133607864379883}, 'MongoDB': {1: 0.06674647331237793, 2: 0.0008528232574462891, 3: 0.0005080699920654297}, 'Redis': {1: 0.012083768844604492, 2: 0.0007612705230712891, 3: 0.00040459632873535156}}, 'thousand': {'MySQL': {1: 0.02951955795288086, 2: 0.03324723243713379, 3: 0.0032198429107666016}, 'MongoDB': {1: 0.09136295318603516, 2: 0.0013675689697265625, 3: 0.0010421276092529297}, 'Redis': {1: 0.06814050674438477, 2: 0.0008876323699951172, 3: 0.0007178783416748047}}, 'ten_thousand': {'MySQL': {1: 0.08075118064880371, 2: 0.00867915153503418, 3: 0.0052492618560791016}, 'MongoDB': {1: 0.5689220428466797, 2: 0.007559537887573242, 3: 0.00594

In [None]:
def plot(heights, width=0.2):
    fig = plt.subplots(figsize=(12, 8))
    