In [1]:
import sys
import json
import csv
import yaml

import pandas as pd
import numpy as np

import matplotlib as mpl

import time
from datetime import datetime

import pprint

import psycopg2
from sqlalchemy import create_engine, text as sql_text

sys.path.append('benchmarking/')

import util
util.hello_world()

Hello World!


In [2]:
db_eng = create_engine('postgresql+psycopg2://postgres:postgres@localhost:5432/airbnb',
                       connect_args={'options': '-csearch_path={}'.format('public')},
                       isolation_level = 'SERIALIZABLE')
#    , echo=True)
#    , echo_pool="debug")

print("Successfully created db engine.")

Successfully created db engine.


In [3]:
import importlib
importlib.reload(util)

<module 'util' from 'c:\\Users\\treea\\Downloads\\ECS 116\\Assignment 2\\benchmarking\\util.py'>

In [4]:
json_file_name = 'update_datetimes_query.json'
# initialize json file
util.write_perf_data({}, json_file_name)

In [5]:
def full_value_summary3c(db_eng, dict, count, json_file_name, group):
    perf_summary = util.fetch_perf_data(json_file_name)
    for query_name, queries in dict.items():
        perf_dict = {}
        
        # no indexes
        perf_dict["__"] = util.get_run_time_stats_single_query(db_eng, count, queries[0])

        # w/ datetime index
        util.add_drop_index(db_eng, "add", "datetime", "reviews")
        perf_dict["__datetime_in_reviews__"] = util.get_run_time_stats_single_query(db_eng, count, queries[1])

        # w/ neigh index
        util.add_drop_index(db_eng, "drop", "datetime", "reviews")
        if group == False: # neighborhood
            util.add_drop_index(db_eng, "add", "neighbourhood", "listings")
        else: # neighborhood group
            util.add_drop_index(db_eng, "add", "neighbourhood_group", "listings")
        perf_dict["__neigh_in_listings__"] = util.get_run_time_stats_single_query(db_eng, count, queries[0])

        # q_show_indexes = f'''
        # select *
        # from pg_indexes
        # where tablename = 'reviews';
        # '''
        # with db_eng.connect() as conn:
        #     result = conn.execute(sql_text(q_show_indexes)).all()
        # print(result)

        # w/ datetime, neigh indexes
        util.add_drop_index(db_eng, "add", "datetime", "reviews")
        perf_dict["__datetime_in_reviews__neigh_in_listings__"] = util.get_run_time_stats_single_query(db_eng, count, queries[1])
        util.add_drop_index(db_eng, "drop", "datetime", "reviews")
        if group == False: # neighborhood
            util.add_drop_index(db_eng, "drop", "neighbourhood", "listings")
        else: # neighborhood group
            util.add_drop_index(db_eng, "drop", "neighbourhood_group", "listings")

        perf_summary[query_name] = perf_dict

    util.write_perf_data(perf_summary, json_file_name)
    return perf_summary 

In [6]:
# Bedford-Stuyvesant
neigh = "Bedford-Stuyvesant"
group = False
bedstuy_dict = {}
q_name = f'update_datetimes_query_{neigh}'
bedstuy_dict[q_name] = [util.build_query_update_datetimes_neigh_add(neigh),
                    util.build_query_update_datetimes_neigh_minus(neigh)]

count = 50

In [7]:
full_value_summary3c(db_eng, bedstuy_dict, count, json_file_name, group)

{'update_datetimes_query_Bedford-Stuyvesant': {'__': {'avg': 17.2498,
   'min': 8.8695,
   'max': 28.6117,
   'std': 2.3311,
   'count': 50,
   'timestamp': '2024-05-22-19:46:50'},
  '__datetime_in_reviews__': {'avg': 23.6584,
   'min': 20.9626,
   'max': 28.1196,
   'std': 1.8425,
   'count': 50,
   'timestamp': '2024-05-22-20:06:29'},
  '__neigh_in_listings__': {'avg': 22.9421,
   'min': 21.7651,
   'max': 30.6314,
   'std': 1.3607,
   'count': 50,
   'timestamp': '2024-05-22-20:25:37'},
  '__datetime_in_reviews__neigh_in_listings__': {'avg': 36.7612,
   'min': 27.3157,
   'max': 60.1002,
   'std': 8.0561,
   'count': 50,
   'timestamp': '2024-05-22-20:56:17'}}}

In [8]:
# Bronx
neigh = "Bronx"
group = True
bronx_dict = {}
q_name = f'update_datetimes_query_{neigh}'
bronx_dict[q_name] = [util.build_query_update_datetimes_neigh_group_add(neigh),
                    util.build_query_update_datetimes_neigh_group_minus(neigh)]
full_value_summary3c(db_eng, bronx_dict, count, json_file_name, group)

{'update_datetimes_query_Bedford-Stuyvesant': {'__': {'avg': 17.2498,
   'min': 8.8695,
   'max': 28.6117,
   'std': 2.3311,
   'count': 50,
   'timestamp': '2024-05-22-19:46:50'},
  '__datetime_in_reviews__': {'avg': 23.6584,
   'min': 20.9626,
   'max': 28.1196,
   'std': 1.8425,
   'count': 50,
   'timestamp': '2024-05-22-20:06:29'},
  '__neigh_in_listings__': {'avg': 22.9421,
   'min': 21.7651,
   'max': 30.6314,
   'std': 1.3607,
   'count': 50,
   'timestamp': '2024-05-22-20:25:37'},
  '__datetime_in_reviews__neigh_in_listings__': {'avg': 36.7612,
   'min': 27.3157,
   'max': 60.1002,
   'std': 8.0561,
   'count': 50,
   'timestamp': '2024-05-22-20:56:17'}},
 'update_datetimes_query_Bronx': {'__': {'avg': 15.6538,
   'min': 13.8352,
   'max': 22.5352,
   'std': 2.1529,
   'count': 50,
   'timestamp': '2024-05-22-21:13:39'},
  '__datetime_in_reviews__': {'avg': 15.9867,
   'min': 14.6668,
   'max': 21.8123,
   'std': 1.3257,
   'count': 50,
   'timestamp': '2024-05-22-21:27:01'},


In [9]:
# Fort Hamilton
neigh = "Fort Hamilton"
group = False
fh_dict = {}
q_name = f'update_datetimes_query_{neigh}'
fh_dict[q_name] = [util.build_query_update_datetimes_neigh_add(neigh),
                    util.build_query_update_datetimes_neigh_minus(neigh)]

full_value_summary3c(db_eng, fh_dict, count, json_file_name, group)

{'update_datetimes_query_Bedford-Stuyvesant': {'__': {'avg': 17.2498,
   'min': 8.8695,
   'max': 28.6117,
   'std': 2.3311,
   'count': 50,
   'timestamp': '2024-05-22-19:46:50'},
  '__datetime_in_reviews__': {'avg': 23.6584,
   'min': 20.9626,
   'max': 28.1196,
   'std': 1.8425,
   'count': 50,
   'timestamp': '2024-05-22-20:06:29'},
  '__neigh_in_listings__': {'avg': 22.9421,
   'min': 21.7651,
   'max': 30.6314,
   'std': 1.3607,
   'count': 50,
   'timestamp': '2024-05-22-20:25:37'},
  '__datetime_in_reviews__neigh_in_listings__': {'avg': 36.7612,
   'min': 27.3157,
   'max': 60.1002,
   'std': 8.0561,
   'count': 50,
   'timestamp': '2024-05-22-20:56:17'}},
 'update_datetimes_query_Bronx': {'__': {'avg': 15.6538,
   'min': 13.8352,
   'max': 22.5352,
   'std': 2.1529,
   'count': 50,
   'timestamp': '2024-05-22-21:13:39'},
  '__datetime_in_reviews__': {'avg': 15.9867,
   'min': 14.6668,
   'max': 21.8123,
   'std': 1.3257,
   'count': 50,
   'timestamp': '2024-05-22-21:27:01'},


In [10]:
# Long Island City
neigh = "Long Island City"
group = False
lic_dict = {}
q_name = f'update_datetimes_query_{neigh}'
lic_dict[q_name] = [util.build_query_update_datetimes_neigh_add(neigh),
                    util.build_query_update_datetimes_neigh_minus(neigh)]

full_value_summary3c(db_eng, lic_dict, count, json_file_name, group)

{'update_datetimes_query_Bedford-Stuyvesant': {'__': {'avg': 17.2498,
   'min': 8.8695,
   'max': 28.6117,
   'std': 2.3311,
   'count': 50,
   'timestamp': '2024-05-22-19:46:50'},
  '__datetime_in_reviews__': {'avg': 23.6584,
   'min': 20.9626,
   'max': 28.1196,
   'std': 1.8425,
   'count': 50,
   'timestamp': '2024-05-22-20:06:29'},
  '__neigh_in_listings__': {'avg': 22.9421,
   'min': 21.7651,
   'max': 30.6314,
   'std': 1.3607,
   'count': 50,
   'timestamp': '2024-05-22-20:25:37'},
  '__datetime_in_reviews__neigh_in_listings__': {'avg': 36.7612,
   'min': 27.3157,
   'max': 60.1002,
   'std': 8.0561,
   'count': 50,
   'timestamp': '2024-05-22-20:56:17'}},
 'update_datetimes_query_Bronx': {'__': {'avg': 15.6538,
   'min': 13.8352,
   'max': 22.5352,
   'std': 2.1529,
   'count': 50,
   'timestamp': '2024-05-22-21:13:39'},
  '__datetime_in_reviews__': {'avg': 15.9867,
   'min': 14.6668,
   'max': 21.8123,
   'std': 1.3257,
   'count': 50,
   'timestamp': '2024-05-22-21:27:01'},


In [11]:
# Manhattan
neigh = "Manhattan"
group = True
man_dict = {}
q_name = f'update_datetimes_query_{neigh}'
man_dict[q_name] = [util.build_query_update_datetimes_neigh_group_add(neigh),
                    util.build_query_update_datetimes_neigh_group_minus(neigh)]
full_value_summary3c(db_eng, man_dict, count, json_file_name, group)

{'update_datetimes_query_Bedford-Stuyvesant': {'__': {'avg': 17.2498,
   'min': 8.8695,
   'max': 28.6117,
   'std': 2.3311,
   'count': 50,
   'timestamp': '2024-05-22-19:46:50'},
  '__datetime_in_reviews__': {'avg': 23.6584,
   'min': 20.9626,
   'max': 28.1196,
   'std': 1.8425,
   'count': 50,
   'timestamp': '2024-05-22-20:06:29'},
  '__neigh_in_listings__': {'avg': 22.9421,
   'min': 21.7651,
   'max': 30.6314,
   'std': 1.3607,
   'count': 50,
   'timestamp': '2024-05-22-20:25:37'},
  '__datetime_in_reviews__neigh_in_listings__': {'avg': 36.7612,
   'min': 27.3157,
   'max': 60.1002,
   'std': 8.0561,
   'count': 50,
   'timestamp': '2024-05-22-20:56:17'}},
 'update_datetimes_query_Bronx': {'__': {'avg': 15.6538,
   'min': 13.8352,
   'max': 22.5352,
   'std': 2.1529,
   'count': 50,
   'timestamp': '2024-05-22-21:13:39'},
  '__datetime_in_reviews__': {'avg': 15.9867,
   'min': 14.6668,
   'max': 21.8123,
   'std': 1.3257,
   'count': 50,
   'timestamp': '2024-05-22-21:27:01'},


In [12]:
# New Springville
neigh = "New Springville"
group = False
ns_dict = {}
q_name = f'update_datetimes_query_{neigh}'
ns_dict[q_name] = [util.build_query_update_datetimes_neigh_add(neigh),
                    util.build_query_update_datetimes_neigh_minus(neigh)]

full_value_summary3c(db_eng, ns_dict, count, json_file_name, group)

{'update_datetimes_query_Bedford-Stuyvesant': {'__': {'avg': 17.2498,
   'min': 8.8695,
   'max': 28.6117,
   'std': 2.3311,
   'count': 50,
   'timestamp': '2024-05-22-19:46:50'},
  '__datetime_in_reviews__': {'avg': 23.6584,
   'min': 20.9626,
   'max': 28.1196,
   'std': 1.8425,
   'count': 50,
   'timestamp': '2024-05-22-20:06:29'},
  '__neigh_in_listings__': {'avg': 22.9421,
   'min': 21.7651,
   'max': 30.6314,
   'std': 1.3607,
   'count': 50,
   'timestamp': '2024-05-22-20:25:37'},
  '__datetime_in_reviews__neigh_in_listings__': {'avg': 36.7612,
   'min': 27.3157,
   'max': 60.1002,
   'std': 8.0561,
   'count': 50,
   'timestamp': '2024-05-22-20:56:17'}},
 'update_datetimes_query_Bronx': {'__': {'avg': 15.6538,
   'min': 13.8352,
   'max': 22.5352,
   'std': 2.1529,
   'count': 50,
   'timestamp': '2024-05-22-21:13:39'},
  '__datetime_in_reviews__': {'avg': 15.9867,
   'min': 14.6668,
   'max': 21.8123,
   'std': 1.3257,
   'count': 50,
   'timestamp': '2024-05-22-21:27:01'},


In [13]:
# Queens
neigh = "Queens"
group = True
queens_dict = {}
q_name = f'update_datetimes_query_{neigh}'
queens_dict[q_name] = [util.build_query_update_datetimes_neigh_group_add(neigh),
                    util.build_query_update_datetimes_neigh_group_minus(neigh)]
full_value_summary3c(db_eng, queens_dict, count, json_file_name, group)

{'update_datetimes_query_Bedford-Stuyvesant': {'__': {'avg': 17.2498,
   'min': 8.8695,
   'max': 28.6117,
   'std': 2.3311,
   'count': 50,
   'timestamp': '2024-05-22-19:46:50'},
  '__datetime_in_reviews__': {'avg': 23.6584,
   'min': 20.9626,
   'max': 28.1196,
   'std': 1.8425,
   'count': 50,
   'timestamp': '2024-05-22-20:06:29'},
  '__neigh_in_listings__': {'avg': 22.9421,
   'min': 21.7651,
   'max': 30.6314,
   'std': 1.3607,
   'count': 50,
   'timestamp': '2024-05-22-20:25:37'},
  '__datetime_in_reviews__neigh_in_listings__': {'avg': 36.7612,
   'min': 27.3157,
   'max': 60.1002,
   'std': 8.0561,
   'count': 50,
   'timestamp': '2024-05-22-20:56:17'}},
 'update_datetimes_query_Bronx': {'__': {'avg': 15.6538,
   'min': 13.8352,
   'max': 22.5352,
   'std': 2.1529,
   'count': 50,
   'timestamp': '2024-05-22-21:13:39'},
  '__datetime_in_reviews__': {'avg': 15.9867,
   'min': 14.6668,
   'max': 21.8123,
   'std': 1.3257,
   'count': 50,
   'timestamp': '2024-05-22-21:27:01'},


In [14]:
# Staten Island
neigh = "Staten Island"
group = True
staten_dict = {}
q_name = f'update_datetimes_query_{neigh}'
staten_dict[q_name] = [util.build_query_update_datetimes_neigh_group_add(neigh),
                    util.build_query_update_datetimes_neigh_group_minus(neigh)]
full_value_summary3c(db_eng, staten_dict, count, json_file_name, group)

{'update_datetimes_query_Bedford-Stuyvesant': {'__': {'avg': 17.2498,
   'min': 8.8695,
   'max': 28.6117,
   'std': 2.3311,
   'count': 50,
   'timestamp': '2024-05-22-19:46:50'},
  '__datetime_in_reviews__': {'avg': 23.6584,
   'min': 20.9626,
   'max': 28.1196,
   'std': 1.8425,
   'count': 50,
   'timestamp': '2024-05-22-20:06:29'},
  '__neigh_in_listings__': {'avg': 22.9421,
   'min': 21.7651,
   'max': 30.6314,
   'std': 1.3607,
   'count': 50,
   'timestamp': '2024-05-22-20:25:37'},
  '__datetime_in_reviews__neigh_in_listings__': {'avg': 36.7612,
   'min': 27.3157,
   'max': 60.1002,
   'std': 8.0561,
   'count': 50,
   'timestamp': '2024-05-22-20:56:17'}},
 'update_datetimes_query_Bronx': {'__': {'avg': 15.6538,
   'min': 13.8352,
   'max': 22.5352,
   'std': 2.1529,
   'count': 50,
   'timestamp': '2024-05-22-21:13:39'},
  '__datetime_in_reviews__': {'avg': 15.9867,
   'min': 14.6668,
   'max': 21.8123,
   'std': 1.3257,
   'count': 50,
   'timestamp': '2024-05-22-21:27:01'},
