## <span style=color:blue>Patterns used in Programming Assignment 2 (version mostly avoiding the util.py file)  </span>

In [1]:
import sys
import json
import csv
import yaml

import pandas as pd
import numpy as np

import matplotlib as mpl

import time
from datetime import datetime
# see https://stackoverflow.com/questions/415511/how-do-i-get-the-current-time-in-python
#   for some basics about datetime

import pprint

# sqlalchemy 2.0 documentation: https://www.sqlalchemy.org/
import psycopg2
from sqlalchemy import create_engine, text as sql_text

# the following is deprecated, it seems, so using the sqlalchemy
# from pyscopg2 import sqlio

# the file in benchmarking/util.py should hold utilities useful for your benchmarking exercise
# In this notebook we have commented out all mentions of util, so that you can run
#    this notebook before setting up your benchmarking/util.py file
sys.path.append('benchmarking/')
import util
# to invoke a function "foo()" inside util.py, use "util.foo()"

In [2]:
# test that utils.py has been imported well
util.hello_world()

Hello World!


### <span style=color:blue>For this exercise you will use four .csv files from AirBnB.</span>

<span style=color:blue>You can find the files at https://drive.google.com/drive/folders/14gWh0ck3vzWxyakaWHHH38AgWY7UC-IQ?usp=sharing </span> 

### <span style=color:blue>Setting up Postgres connection.  Note database name is "airbnb" </span>

### <span style=color:blue>Note: this should be modified so that the user name/password are not included into the program. </span>

<span style=color:blue>E.g., see https://docs.sqlalchemy.org/en/20/core/engines.html for how to construct the URLs that the create_engine command uses.  Also, one should store the user/password into environment variables and read them in to populate the URL.  </span>

<span style=color:blue>E.g., see https://stackoverflow.com/questions/4906977/how-can-i-access-environment-variables-in-python for how to work with environment variables on mac, </span>

In [3]:
# following https://www.geeksforgeeks.org/connecting-postgresql-with-sqlalchemy-in-python/

db_eng = create_engine('postgresql+psycopg2://postgres:postgres@localhost:5432/airbnb',
                       connect_args={'options': '-csearch_path={}'.format('public')},
                       isolation_level = 'SERIALIZABLE')
#    , echo=True)
#    , echo_pool="debug")

print("Successfully created db engine.")

# connect_args is used to set search_path to the schema 'new_york_city' in the airbnb database

# isolation_level SERIALIZABLE makes transactions happen in sequence, which is good 
#      for the benchmarking we will be doing

# for general info on sqlalchemy connections,
#    see: https://docs.sqlalchemy.org/en/20/core/connections.html

# echo from https://docs.sqlalchemy.org/en/20/core/engines.html

Successfully created db engine.


### <span style=color:blue>Here is a pattern for using db_eng for queries</span>

In [4]:
q1 = """ 
SELECT *
FROM reviews 
WHERE date >= '2015-01-01' 
  AND date <= '2015-12-31' 
"""
q2 = """ 
SELECT count(*)
FROM reviews 
WHERE date >= '2015-01-01' 
  AND date <= '2015-12-31' 
"""

# You can use conn.execute, which populates a cursor, in this case "result1" or "result2"
# Alternatively, you can use pd.read_sql, which populates a dataframe
with db_eng.connect() as conn:
    result1 = conn.execute(sql_text(q1))   # sql_text was part of import from psycopg2
    df1 = pd.read_sql(q1, con=conn)
    
    result2 = conn.execute(sql_text(q2))
    df2 = pd.read_sql(q2, con=conn)
    # conn.close() is automatically added to the end of this block

print()
print(type(result1))
print()
print(type(df1))
print()
pprint.pp(result1.fetchmany(3), width=120)
print()
pprint.pp(df1.head(3))
print()
print(result2.all())            # result is small, so can fetch all of it
print()
pprint.pp(df2.head(10))


<class 'sqlalchemy.engine.cursor.CursorResult'>

<class 'pandas.core.frame.DataFrame'>

[('711635', '29420421', datetime.date(2015, 4, 7), '17785676', 'Isabelle', "We spent the last Easter holidays in Barry's apartment. We had a great time and felt very homy. This east village location is fantastic. I really recommend this place. ", datetime.datetime(2015, 4, 7, 12, 0)),
 ('651375', '36107844', datetime.date(2015, 6, 25), '9371676', 'David', 'Great location. Spacious and comfortable, gracious and responsive host.', datetime.datetime(2015, 6, 25, 12, 0)),
 ('712136', '32735980', datetime.date(2015, 5, 20), '3353725', 'Yevgeniy', "Marianne's house is very nice. I recommend for everyone her place for staying!", datetime.datetime(2015, 5, 20, 12, 0))]

  listing_id        id        date reviewer_id reviewer_name  \
0     711635  29420421  2015-04-07    17785676      Isabelle   
1     651375  36107844  2015-06-25     9371676         David   
2     712136  32735980  2015-05-20     3353725  

### <span style=color:blue>Example of pattern for creating parameterized functions for creating (parameterized) queries</span>

<span style=color:blue>As part of Programming Assignment 2, you will create several query building functions,
and put them into your utils.py file</span>

In [5]:
def build_query_reviews_count(date1, date2):
    q21 = """
SELECT count(*)
FROM reviews
WHERE date >= '"""
    q22 = """'
  AND date <= '"""
    q23 = """';
"""
    return q21 + date1 + q22 + date2 + q23

print(build_query_reviews_count('2015-01-01', '2015-12-31'))


SELECT count(*)
FROM reviews
WHERE date >= '2015-01-01'
  AND date <= '2015-12-31';



<span style=color:blue>We now show a query that will be used below to illustrated various things. You should build a function, perhaps called "build_query_listings_join_reviews" that takes two parameters for start date and end date, that can build this kind of query. </span> 

In [6]:
# def build_query_listings_join_reviews(date1, date2):
#     q24 = """
#     SELECT DISTINCT l.id, l.name
#     FROM listings l, reviews r
#     WHERE l.id = r.listing_id
#       AND r.date >= '"""
#     q25 = """'
#       AND r.date <= '"""
#     q26 = """'
#     ORDER BY l.id;
#     """
#     return q24 + date1 + q25 + date2 + q26

# print(build_query_listings_join_reviews('2015-01-01', '2015-12-31'))

In [7]:
q_listings_join_reviews_2015 = """
SELECT DISTINCT l.id, l.name
FROM listings l, reviews r 
WHERE l.id = r.listing_id
  AND r.date >= '2015-01-01'
  AND r.date <= '2015-12-31'
ORDER BY l.id;
"""

# The following code would work if you have the function build_query_listings_join_reviews()
#    defined in your util.py file

# """

q_dict = {}

q_dict['listings_join_reviews_2013'] = util.build_query_listings_join_reviews('2013-01-01', '2013-12-31')
# note: The reviews table has 7,317 entries in 2013

q_dict['listings_join_reviews_2015'] = util.build_query_listings_join_reviews('2015-01-01', '2015-12-31')
# note: The reviews table has 28,465 entries in 2015

q_dict['listings_join_reviews_2019'] = util.build_query_listings_join_reviews('2019-01-01', '2019-12-31')
# note: The reviews table has 126,469 entries in 2019

q_dict['listings_join_reviews_2023'] = util.build_query_listings_join_reviews('2023-01-01', '2023-12-31')
# note: The reviews table has 228,831 entries in 2023

print(q_dict['listings_join_reviews_2013'])
print()
print(q_dict['listings_join_reviews_2015'])
print()
print(q_dict['listings_join_reviews_2019'])
print()
print(q_dict['listings_join_reviews_2023'])
# """
print()


    SELECT DISTINCT l.id, l.name
    FROM listings l, reviews r
    WHERE l.id = r.listing_id
      AND r.datetime >= '2013-01-01'
      AND r.datetime <= '2013-12-31'
    ORDER BY l.id;
    


    SELECT DISTINCT l.id, l.name
    FROM listings l, reviews r
    WHERE l.id = r.listing_id
      AND r.datetime >= '2015-01-01'
      AND r.datetime <= '2015-12-31'
    ORDER BY l.id;
    


    SELECT DISTINCT l.id, l.name
    FROM listings l, reviews r
    WHERE l.id = r.listing_id
      AND r.datetime >= '2019-01-01'
      AND r.datetime <= '2019-12-31'
    ORDER BY l.id;
    


    SELECT DISTINCT l.id, l.name
    FROM listings l, reviews r
    WHERE l.id = r.listing_id
      AND r.datetime >= '2023-01-01'
      AND r.datetime <= '2023-12-31'
    ORDER BY l.id;
    



### <span style=color:blue>Here is a pattern for computing the run-time of something, e.g., a query or an update.</span>

<span style=color:blue>You should also put this into your util.py file.</span>

In [8]:
def time_diff(time1, time2):
    return (time2-time1).total_seconds()

# testing it:
time1 = datetime.now()
# put query or update code in place of sleep command
time.sleep(0.5)
time2 = datetime.now()

print(time_diff(time1,time2))
    

0.502935


### <span style=color:blue>Here is an example of running a query multiple times, and keeping track of run times</span>

<span style=color:blue>As part of Programming Assignment 2, you should create a general-purpose function for doing this,
and put it into your utils.py file<span>

<span style=color:blue>In the illustration below we read the output of the query into a dataframe, which ensures that the entire output is computed and exported by PostgreSQL.  If we read the output into a cursor, then PostgreSQL might use a "lazy" approach, and not compute the full query output until we scroll through the cursor. </span>

In [9]:
# def get_run_time_stats_single_query(db_eng, count, query):
#     time_list = []        
#     for i in range(0,count): 
#         time_start = datetime.now()

#         with db_eng.connect() as conn:
#             df = pd.read_sql(query, con=conn)

#         time_end = datetime.now()
#         diff = time_diff(time_start, time_end)
#         time_list.append(diff)
            
#     pprint.pp(time_list)
#     print(round(sum(time_list)/len(time_list), 4), \
#         round(min(time_list), 4), \
#         round(max(time_list), 4), \
#         round(np.std(time_list), 4))

# get_run_time_stats_single_query(db_eng, 20, q_listings_join_reviews_2015)

In [10]:
# test_result = util.get_run_time_stats_single_query(db_eng, 20, q_listings_join_reviews_2015)
# test_result

In [11]:
# we will use the query q_listings_join_reviews_2015 defined above

count = 20

time_list = []
for i in range(0,count):
    time_start = datetime.now()
    # Open new db connection for each execution of the query to avoid multithreading
    with db_eng.connect() as conn:
        df = pd.read_sql(q_listings_join_reviews_2015, con=conn)

    time_end = datetime.now()
    diff = time_diff(time_start, time_end)
    time_list.append(diff)

pprint.pp(time_list)
print(round(sum(time_list)/len(time_list), 4), \
        round(min(time_list), 4), \
        round(max(time_list), 4), \
        round(np.std(time_list), 4))

[0.77639,
 0.487714,
 0.535308,
 0.564647,
 0.917466,
 0.532523,
 0.549879,
 0.504005,
 0.530573,
 0.481914,
 0.549427,
 0.448329,
 0.537915,
 0.511184,
 0.516364,
 0.630872,
 0.488022,
 0.571781,
 0.543777,
 0.631617]
0.5655 0.4483 0.9175 0.1058


### <span style=color:blue>Here is a pattern for adding/dropping indexes. </span>

<span style=color:blue>As part of programming exercise 2 you should create a general-purpose parameterized function that can be used to add or drop an index with a given name, focused on a given table, and on a given column of that table.  After testing that the function behaves as you expect it then you should put that function into the file utils.py. </span>

<span style=color:blue>For this function, I used the name add_drop_index() with four arguments:  db_eng, add/drop, column to index, table.  I assume a systematic naming of the indexes, having the form <col-name>_in_<table_name></span>

<span style=color:blue>(The "show_indexes" queries are mainly for testing that the add/drop index functions are working correctly.)<span>

In [12]:
# def add_drop_index(db_eng, add_or_drop, col, table):
#     q_create_index = f'''
#     BEGIN TRANSACTION;
#     CREATE INDEX IF NOT EXISTS {col}_in_{table}
#     ON {table}({col});
#     END TRANSACTION;
#     '''

#     q_drop_index = f'''
#     BEGIN TRANSACTION;
#     DROP INDEX IF EXISTS {col}_in_{table};
#     END TRANSACTION;
#     '''

#     q_show_indexes = f'''
#     select *
#     from pg_indexes
#     where tablename = '{table}';
#     '''

#     with db_eng.connect() as conn:
#         if add_or_drop == 'add':
#             conn.execute(sql_text(q_create_index))
#         elif add_or_drop == 'drop':
#             conn.execute(sql_text(q_drop_index))
#         result = conn.execute(sql_text(q_show_indexes))
#         # print()
#         # print(f'The set of indexes on {table} is: ')
#         # print(result.all())
#         return result.all()

# test_result = add_drop_index(db_eng, 'add', 'date', 'reviews')
# test_result

In [5]:
q_create_date_in_reviews = '''
BEGIN TRANSACTION;
CREATE INDEX IF NOT EXISTS date_in_reviews
ON reviews(date);
END TRANSACTION;
'''

q_drop_date_in_reviews = '''
BEGIN TRANSACTION;
DROP INDEX IF EXISTS date_in_reviews;
END TRANSACTION;
'''

q_show_indexes_for_reviews = '''
select *
from pg_indexes
where tablename = 'reviews';
'''

# by using a code block, it ensures that after completion 
#     the change to the indexes will be committed in the database
with db_eng.connect() as conn:
    # conn.execute(sql_text(q_create_date_in_reviews))
    # conn.execute(sql_text(q_drop_date_in_reviews))
    result_reviews = conn.execute(sql_text(q_show_indexes_for_reviews))
    print()
    print('The set of indexes on reviews is: ')
    print(result_reviews.all())



The set of indexes on reviews is: 
[('public', 'reviews', 'datetime_in_reviews', None, 'CREATE INDEX datetime_in_reviews ON public.reviews USING btree (datetime)'), ('public', 'reviews', 'comments_tsv_in_reviews', None, 'CREATE INDEX comments_tsv_in_reviews ON public.reviews USING gin (comments_tsv)')]


: 

### <span style=color:blue>Now there is an index on the date column of reviews.  Rerun the preceding cell to see if the performance on the query q_listings_join_reviews_2015 has changed </span>

### <span style=color:blue>The performance results will be held in a file 'perf_data/perf_summary.json' in your base directory. The format of this json file is described here. </span>

<span style=color:blue> Also, this cell shows functions for fetching the previous performance data (stored as json in  "perf_data/perf_summary.json"), and then writing it out again (after you have adding in more data).  This will allow you to run numerous tests at different times, but keep all of the results in one place.</span>


In [15]:
# the key for each entry of perf_dict will be the name of a query or update
# the value for each entry of perf_dict will be a "perf_dict" with keys that 
#     list all indexes that were in force at the time of the test run.  E.g.:
# 
#        { '__' : ...,                                     -- i.e., no indexes in force
#          '__id_in_listings__' : ...,                     -- indexes in force: { id_in_listings }  
#          '__date_in_reviews__' : ...,                    -- indexes in force: { date_in_reviews }
#          '__date_in_reviews__id_in_listings__' : ... }   -- indexes in force: { date_in_reviews, id_in_listings }

# the value for each entry of the inner dict will have be a "performance profile" (perf_prof):
#       having shape {avg: ..., min: ..., max: ..., std: ...}
# (please see below for an example)


# fetches filename (which should be a json file) and returns a 
#       dict corresponding to the contents of filename
def fetch_perf_data(filename):
    f = open('perf_data/' + filename)
    return json.load(f)

# writes the dictionary in dict as a json file into filename
def write_perf_data(dict, filename):
    with open('perf_data/' + filename, 'w') as fp:
        json.dump(dict, fp)

# testing:
# test = { 'foo': 'goo', 'foo1' : {'hoo': 'boo', 'zoo': 'loo'}}
# write_perf_data(test, 'test.json')
# dict = fetch_perf_data('test.json')
# pprint.pp(dict, indent=4)

{'foo': 'goo', 'foo1': {'hoo': 'boo', 'zoo': 'loo'}}


<span style=color:blue>Run the next code once to initialize the file 'perf_data/perf_summary.json'; then comment it out!</span>

In [16]:
# initialize the performance data perf_summary.json file to {}
# write_perf_data({}, 'perf_summary.json')

# sanity check
# perf_summary = fetch_perf_data('perf_summary.json')
# pprint.pp(perf_summary, indent=4)

{}


### <span style=color:blue>Here is an illustration of how you can perform one test (with specified indexes) on one query</span>

#### <span style=color:blue>CAUTION: the next cell is using two functions that I have set up in my benchmarking/util.py file, so it will not run for you until you set up these functions.  </span>

<span style=color:blue>As part of the progamming exercise, you should create one or more parameterized functions that will enable you to invoke this kind of test numerous times, on a selected query/update and a set of selected indexes.

<span style=color:blue>To provide a small illustration of the family of performance values that you will be obtaining I have run the following cell four times on the same query, but using different combinations of indexes.  Can you explain why there are different running times for different combinations of indexes?  Also, do you get roughly the same numbers as I do -- why or why not?  Do you get the same numbers if you run the test for a given set of indexes twice -- why or why not?</span>


In [21]:
# the variable all_indexes will hold all of the indexes involved in your testing.
#   For now there are 3 indexes, but there will be more.  set of all indexes will get bigger once we do more explorations
# Here, a pair ['col','table'] refers to an index on column 'col' in table 'table'
# (in an ideal world, we would keep a copy of this on disk, probably in your computer's file system,
#   and read it in when we want to use it and/or add to it.  For the full Programming Assignment 2
#   we will be working with 4 to 6 indexes)

all_indexes = [['date','reviews'], ['date','calendar'], ['id','listings']] 


# pull in performance summary from previous tests done
perf_summary = fetch_perf_data('perf_summary.json')

# we will use the same query as above, and call it 'listings_join_reviews_2015'
#   in perf_summary.json, info about different runs for this query are
#   held in perf_summary[<<query_name>>]

# q = q_dict[query_name]
q_listings_join_reviews_2015 = """
SELECT DISTINCT l.id, l.name
FROM listings l, reviews r 
WHERE l.id = r.listing_id
  AND r.date >= '2015-01-01'
  AND r.date <= '2015-12-31'
ORDER BY l.id;
"""

query_name = 'listings_join_reviews_2015'


# here the spec is a listing of column-table pairs corresponding to indexes that are
#    to be included in the test
# I have run this jupyter cell on the 4 specs listed below
spec = [['id','listings'], ['date','reviews']]
# spec = [['date','reviews']]
# spec = [['id','listings']]
# spec = []

# count will hold the number of times we want to run the query
count = 50

print('Processing spec: ', str(spec), '\n')
for index in all_indexes:
    if index not in spec:
        mod_index = util.add_drop_index(db_eng, 'drop', index[0], index[1])
        print('\nAfter doing the drop for', str(index), 'the indexes on table "' + index[1] + '" are: ')
        print(mod_index)
        
for index in spec:
    mod_index = util.add_drop_index(db_eng, 'add', index[0], index[1])
    print('\nAfter doing the add for', str(index), 'the indexes on table "' + index[1] + '" are: ')
    print(mod_index)

time_list = []
for i in range(0,count):
    time_start = datetime.now()
    # Open new db connection for each execution of the query to avoid multithreading
    with db_eng.connect() as conn:
        df = pd.read_sql(q_listings_join_reviews_2015, con=conn)
    time_end = datetime.now()
    diff = time_diff(time_start, time_end)
    time_list.append(diff)
    
perf_profile = {}
perf_profile['avg'] = round(sum(time_list)/len(time_list), 4)
perf_profile['min'] = round(min(time_list), 4)
perf_profile['max'] = round(max(time_list), 4)
perf_profile['std'] = round(np.std(time_list), 4)

print('\nThe list of running times is as follows:')
pprint.pp(time_list)

print('\nThe statistics on the list of running times are as follows:')
pprint.pp(perf_profile)

# util.build_index_description_key() creates a listing of strings corresponding
#    to the entries in spec, and concatenates them in the ordering given by all_indexes
#    For example, the description_key associated with having indexes date_in_reviews and id_in_listings
#        would be __date_in_reviews__id_in_listings__'
#        (You probably want to use a uniform ordering of index names when you create these description_keys
key_value = util.build_index_description_key(all_indexes, spec)
print('\nThe new value for"' + key_value + '"will be', str(perf_profile))


# we may have run some other tests with the query q_listings_join_reviews_2015' and
#   we don't want to overwrite those.  So we need to get the full contents
#   of perf_summary['listings_join_reviews_2015'] and then
#   write (or overwrite) the value for the current list of indexes used

if query_name in perf_summary:
    perf_dict = perf_summary[query_name]
    print("\nBefore modifying perf_dict, the value of perf_summary[query_name] (if it existed) was: ")
    pprint.pp(perf_dict)
else:
    perf_dict = {}
    print("\nBefore modifying perf_dict, the value of perf_summary[query_name] had empty value")
print()
perf_dict[key_value] = perf_profile
perf_summary['listings_join_reviews_2015'] = perf_dict

print("\nAfter modifying perf_dict, the value of perf_summary[query_name] is: ")
pprint.pp(perf_summary[query_name])
print()

print('\nThe full value of perf_summary is:')
pprint.pp(perf_summary)

write_perf_data(perf_summary, 'perf_summary.json')


Processing spec:  [['id', 'listings'], ['date', 'reviews']] 


After doing the drop for ['date', 'calendar'] the indexes on table "calendar" are: 
[]

After doing the add for ['id', 'listings'] the indexes on table "listings" are: 
[('public', 'listings', 'id_in_listings', None, 'CREATE INDEX id_in_listings ON public.listings USING btree (id)')]

After doing the add for ['date', 'reviews'] the indexes on table "reviews" are: 
[('public', 'reviews', 'datetime_in_reviews', None, 'CREATE INDEX datetime_in_reviews ON public.reviews USING btree (datetime)'), ('public', 'reviews', 'date_in_reviews', None, 'CREATE INDEX date_in_reviews ON public.reviews USING btree (date)')]

The list of running times is as follows:
[0.298813,
 0.230117,
 0.232912,
 0.285788,
 0.281809,
 0.315503,
 0.222839,
 0.211974,
 0.237548,
 0.194338,
 0.253574,
 0.203956,
 0.276008,
 0.368474,
 0.305511,
 0.228403,
 0.231432,
 0.266187,
 0.232999,
 0.269418,
 0.238149,
 0.199377,
 0.22333,
 0.3832,
 0.253064,
 0.220545

In [22]:
def full_value_summary(db_eng, query, query_name, spec, all_indexes, count):
    perf_summary = fetch_perf_data('perf_summary.json')

    # drop unused indexes
    for index in all_indexes:
        if index not in spec:
            util.add_drop_index(db_eng, 'drop', index[0], index[1])
            
    # add indexes to the corresponding tables
    for index in spec:
        util.add_drop_index(db_eng, 'add', index[0], index[1])

    # get run time stats
    perf_profile = util.get_run_time_stats_single_query(db_eng, count, query)

    # create these description_keys
    key_value = util.build_index_description_key(all_indexes, spec)

    # before modification: get previous data in perf_summary first
    if query_name in perf_summary:
        perf_dict = perf_summary[query_name]
    else:
        perf_dict = {}
    
    # actually complete the modification
    perf_dict[key_value] = perf_profile
    perf_summary[query_name] = perf_dict

    write_perf_data(perf_summary, 'perf_summary.json')
    return perf_summary


query = q_listings_join_reviews_2015
all_indexes = [['date','reviews'], ['date','calendar'], ['id','listings']]
spec = [['id','listings'], ['date','reviews']] 
query_name = 'listings_join_reviews_2015'
count = 50

print('\nThe full value of perf_summary is:')
test_result = full_value_summary(db_eng, query, query_name, spec, all_indexes, count)
pprint.pp(test_result)


The full value of perf_summary is:
{'listings_join_reviews_2015': {'__date_in_reviews__id_in_listings__': {'avg': 0.2552,
                                                                        'min': 0.1948,
                                                                        'max': 0.418,
                                                                        'std': 0.0467,
                                                                        'count': 50,
                                                                        'timestamp': '2024-05-16-13:42:54'},
                                '__date_in_reviews__': {'avg': 0.2301,
                                                        'min': 0.1906,
                                                        'max': 0.4007,
                                                        'std': 0.0337},
                                '__id_in_listings__': {'avg': 0.5281,
                                                       'min': 0.4416

In [23]:
# # __date_in_reviews__id_in_listings__
# spec = [['id','listings'], ['date','reviews']]
# all_indexes = [['date','reviews'], ['date','calendar'], ['id','listings']] 
# # key_value = util.build_index_description_key(all_indexes, spec)
# # print('\nThe new value for"' + key_value + '"will be', str(perf_profile))

# def build_index_description_key(all_indexes, spec):
#     key_value = "__"
#     for index in all_indexes:
#         if index in spec:
#             key_value = key_value + f"{index[0]}_in_{index[1]}__"
#     return key_value

# key_value = build_index_description_key(all_indexes, spec)
# print(key_value)