In [1]:
import sys
import json
import csv
import yaml

import pandas as pd
import numpy as np

import matplotlib as mpl

import time
from datetime import datetime

import pprint

import psycopg2
from sqlalchemy import create_engine, text as sql_text

sys.path.append('benchmarking/')

import util
util.hello_world()

Hello World!


In [2]:
db_eng = create_engine('postgresql+psycopg2://postgres:postgres@localhost:5432/airbnb',
                       connect_args={'options': '-csearch_path={}'.format('public')},
                       isolation_level = 'SERIALIZABLE')
#    , echo=True)
#    , echo_pool="debug")

print("Successfully created db engine.")

Successfully created db engine.


In [3]:
import importlib
importlib.reload(util)

<module 'util' from 'c:\\Users\\treea\\Downloads\\ECS 116\\Assignment 2\\benchmarking\\util.py'>

In [4]:
json_file_name = 'text_search_query.json'
# initialize json file
util.write_perf_data({}, json_file_name)

In [5]:
def full_value_summary3b(db_eng, dict, count, json_file_name):
    # perf_summary = {}
    perf_summary = util.fetch_perf_data(json_file_name)
    for query_name, queries in dict.items():
        perf_dict = {}
        
        # no indexes
        perf_dict["__"] = util.get_run_time_stats_single_query(db_eng, count, queries[0])

        # w/ comments_tsv index
        perf_dict["__comments_tsv_in_reviews__"] = util.get_run_time_stats_single_query(db_eng, count, queries[1])

        # w/ datetime index
        util.add_drop_index(db_eng, "add", "datetime", "reviews")
        perf_dict["__datetime_in_reviews__"] = util.get_run_time_stats_single_query(db_eng, count, queries[0])

        # q_show_indexes = f'''
        # select *
        # from pg_indexes
        # where tablename = 'reviews';
        # '''
        # with db_eng.connect() as conn:
        #     result = conn.execute(sql_text(q_show_indexes)).all()
        # print(result)

        # w/ comments_tsv, datetime indexes
        perf_dict["__datetime_in_reviews__comments_tsv_in_reviews__"] = util.get_run_time_stats_single_query(db_eng, count, queries[1])
        util.add_drop_index(db_eng, "drop", "datetime", "reviews")

        perf_summary[query_name] = perf_dict

        # before modification: get previous data in perf_summary first

    util.write_perf_data(perf_summary, json_file_name)
    return perf_summary 

In [6]:
# apartment
apartment_dict = {}
yrs = [2009, 2010, 2011, 2012, 2013, 2014, 2017, 2019, 2023]
for year in yrs:
    q_name = f'apartment_{year}'
    apartment_dict[q_name] = [util.build_query_text_search_without_index(f'{year}-01-01', f'{year}-12-31', 'apartment'),
                      util.build_query_text_search_with_index(f'{year}-01-01', f'{year}-12-31', 'apartment')]

count = 50

In [7]:
full_value_summary3b(db_eng, apartment_dict, count, json_file_name)

{'apartment_2009': {'__': {'avg': 4.6067,
   'min': 3.0625,
   'max': 4.8836,
   'std': 0.247,
   'count': 50,
   'timestamp': '2024-05-22-02:03:29'},
  '__comments_tsv_in_reviews__': {'avg': 0.5096,
   'min': 0.4826,
   'max': 0.5502,
   'std': 0.015,
   'count': 50,
   'timestamp': '2024-05-22-02:03:59'},
  '__datetime_in_reviews__': {'avg': 0.003,
   'min': 0.0,
   'max': 0.0169,
   'std': 0.0057,
   'count': 50,
   'timestamp': '2024-05-22-02:04:00'},
  '__datetime_in_reviews__comments_tsv_in_reviews__': {'avg': 0.021,
   'min': 0.0041,
   'max': 0.0673,
   'std': 0.0146,
   'count': 50,
   'timestamp': '2024-05-22-02:04:01'}},
 'apartment_2010': {'__': {'avg': 4.8058,
   'min': 4.4979,
   'max': 5.0269,
   'std': 0.0989,
   'count': 50,
   'timestamp': '2024-05-22-02:07:57'},
  '__comments_tsv_in_reviews__': {'avg': 0.5126,
   'min': 0.4976,
   'max': 0.5513,
   'std': 0.0131,
   'count': 50,
   'timestamp': '2024-05-22-02:08:27'},
  '__datetime_in_reviews__': {'avg': 0.0086,
   '

In [8]:
# awesome
awesome_dict = {}
for year in yrs:
    q_name = f'awesome_{year}'
    awesome_dict[q_name] = [util.build_query_text_search_without_index(f'{year}-01-01', f'{year}-12-31', 'awesome'),
                      util.build_query_text_search_with_index(f'{year}-01-01', f'{year}-12-31', 'awesome')]

In [9]:
full_value_summary3b(db_eng, awesome_dict, count, json_file_name)

{'apartment_2009': {'__': {'avg': 4.6067,
   'min': 3.0625,
   'max': 4.8836,
   'std': 0.247,
   'count': 50,
   'timestamp': '2024-05-22-02:03:29'},
  '__comments_tsv_in_reviews__': {'avg': 0.5096,
   'min': 0.4826,
   'max': 0.5502,
   'std': 0.015,
   'count': 50,
   'timestamp': '2024-05-22-02:03:59'},
  '__datetime_in_reviews__': {'avg': 0.003,
   'min': 0.0,
   'max': 0.0169,
   'std': 0.0057,
   'count': 50,
   'timestamp': '2024-05-22-02:04:00'},
  '__datetime_in_reviews__comments_tsv_in_reviews__': {'avg': 0.021,
   'min': 0.0041,
   'max': 0.0673,
   'std': 0.0146,
   'count': 50,
   'timestamp': '2024-05-22-02:04:01'}},
 'apartment_2010': {'__': {'avg': 4.8058,
   'min': 4.4979,
   'max': 5.0269,
   'std': 0.0989,
   'count': 50,
   'timestamp': '2024-05-22-02:07:57'},
  '__comments_tsv_in_reviews__': {'avg': 0.5126,
   'min': 0.4976,
   'max': 0.5513,
   'std': 0.0131,
   'count': 50,
   'timestamp': '2024-05-22-02:08:27'},
  '__datetime_in_reviews__': {'avg': 0.0086,
   '

In [10]:
# horrible
horrible_dict = {}
for year in yrs:
    q_name = f'horrible_{year}'
    horrible_dict[q_name] = [util.build_query_text_search_without_index(f'{year}-01-01', f'{year}-12-31', 'horrible'),
                      util.build_query_text_search_with_index(f'{year}-01-01', f'{year}-12-31', 'horrible')]

In [11]:
full_value_summary3b(db_eng, horrible_dict, count, json_file_name)

{'apartment_2009': {'__': {'avg': 4.6067,
   'min': 3.0625,
   'max': 4.8836,
   'std': 0.247,
   'count': 50,
   'timestamp': '2024-05-22-02:03:29'},
  '__comments_tsv_in_reviews__': {'avg': 0.5096,
   'min': 0.4826,
   'max': 0.5502,
   'std': 0.015,
   'count': 50,
   'timestamp': '2024-05-22-02:03:59'},
  '__datetime_in_reviews__': {'avg': 0.003,
   'min': 0.0,
   'max': 0.0169,
   'std': 0.0057,
   'count': 50,
   'timestamp': '2024-05-22-02:04:00'},
  '__datetime_in_reviews__comments_tsv_in_reviews__': {'avg': 0.021,
   'min': 0.0041,
   'max': 0.0673,
   'std': 0.0146,
   'count': 50,
   'timestamp': '2024-05-22-02:04:01'}},
 'apartment_2010': {'__': {'avg': 4.8058,
   'min': 4.4979,
   'max': 5.0269,
   'std': 0.0989,
   'count': 50,
   'timestamp': '2024-05-22-02:07:57'},
  '__comments_tsv_in_reviews__': {'avg': 0.5126,
   'min': 0.4976,
   'max': 0.5513,
   'std': 0.0131,
   'count': 50,
   'timestamp': '2024-05-22-02:08:27'},
  '__datetime_in_reviews__': {'avg': 0.0086,
   '