### James explores some queries

In [1]:
import pprint as pp
import pandas as pd
import time

from snowexsql.api import PointMeasurements

In [3]:
og_pt_measurements = PointMeasurements()

instrument_types = og_pt_measurements.all_instruments
observers = og_pt_measurements.all_observers
site_names = og_pt_measurements.all_site_names

#pp.pp(site_names)

### The new version of the database

In [4]:
def calculate_point_query_time(api, instrument, limit, site_name=None, observers=None):
    """
    inputs: 
        - Specify new_api or old_api!!!
        - Argments for the snowexsql.api's PointMeasurements.from_filter() query. 
        - Needs the instrument string, limit size.
        - site_name is optional
        
    returns: 
        - A dataframe with the query's elapsed time and look-up criteria.     
    """

    if api == 'old_api':
        clz = PointMeasurements
    elif api == 'new_api':
        clz = PointsMeasurementsOriginal
    else:
        print("!!!No results; need to specify old_api or new_api!!!")
        return pd.DataFrame([{
            "api_version": api,
            "instrument": instrument,
            "limit": limit,
            "site_name": site_name,
            "observers": observers,
            "elapsed_time": -1,
            "df_length": "Invalid API"
        }])

    try:
        if site_name is None and observers is None:
            initiate_time = time.time()
            df_query = clz.from_filter(
                instrument=instrument,
                limit=limit,
            )
            finished_time = time.time()
            
        elif site_name is not None and observers is None:
            initiate_time = clz.time()
            df_query = clz.from_filter(
                instrument=instrument,
                site_name=site_name,
                limit=limit,
            )
            finished_time = time.time()
            
        elif site_name is not None and observers is not None:
            initiate_time = time.time()
            df_query = clz.from_filter(
                instrument=instrument,
                site_name=site_name,
                observers=observers,
                limit=limit,
            )
            finished_time = time.time()
    
        else:
            df_query = None

    except Exception as e:
        print(f"""!!! Failed Query for...
                  instrument={instrument}, 
                  site={site_name}, 
                  observers={observers},
                  *** Data does not exist ***""")
        return pd.DataFrame([{
            "api_version": api,
            "instrument": instrument,
            "limit": limit,
            "site_name": site_name,
            "observers": observers,
            "elapsed_time": -1,
            "df_length": "Query Failed"
        }])
    
    if df_query is not None:
        df_length = len(df_query)
        elapsed_time = finished_time - initiate_time
    else:
        df_length = "No results for these query params"
        elapsed_time = -1
        
    result = pd.DataFrame([{
        "api_version": api,
        "instrument": instrument, 
        "limit": limit, 
        "site_name": site_name, 
        "observers": observers,
        "elapsed_time": elapsed_time, 
        "df_length": df_length
    }])

    return result




In [None]:
query_times = []

apis = ['old_api']
instruments = ['pulseEkko pro 1 GHz GPR', 'magnaprobe']
limits = [1000, 10000, 1000000]
site_names = ['farmers-creamers', 'Grand Mesa', None]
observers = ['Ryan Webb', 'Randall Bonnell', None]

for api in apis:
    for instrument in instruments:
        for limit in limits:
            for site_name in site_names:
                for observer in observers:

                    query = calculate_point_query_time(api=api,
                                                       instrument=instrument,
                                                       limit=limit,
                                                       site_name=site_name, 
                                                       observers=observer,
                                                      )
                    query_times.append(query)

all_queries = pd.concat(query_times)                    
clean_queries = all_queries[all_queries['elapsed_time'] > 0]
clean_queries

In [12]:
clean_results.to_csv('./misc_data/query_times.csv')