# Universal SQL Data Loader

## Get Superlinked

<div class="alert alert-block alert-warning">
Set <code>YOUR_GITHUB_TOKEN</code> in the URL below.<br>To get your token follow the instructions in the <a href="../README.md">README.md</a>.
</div>

In [None]:
%pip install "https://us-central1-data-359211.cloudfunctions.net/github-proxy/superlinked-0.1.0-py3-none-any.whl?token=YOUR_GITHUB_TOKEN"

Collecting superlinked==0.1.0
  Downloading https://us-central1-data-359211.cloudfunctions.net/github-proxy/superlinked-0.1.0-py3-none-any.whl?token=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 (57 kB)
     ---------------------------------------- 0.0/57.8 kB ? eta -:--:--
     ---------------------------------------- 57.8/57.8 kB 3.0 MB/s eta 0:00:00
Note: you may need to restart the kernel to use updated packages.


DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063


## Imports and constants

In [None]:
import pandas as pd
from sqlalchemy import create_engine, text
from sshtunnel import SSHTunnelForwarder
import concurrent.futures
import threading
import queue
from datetime import datetime, timezone
import pytz
import time
import tkinter as tk
from tkinter import ttk

from superlinked.framework.common.dag.recency_node import PeriodTimeParam
from superlinked.framework.common.schema.schema import schema, IdField, String, Timestamp, Integer, Map, SchemaObject
from superlinked.framework.common.parser.dataframe_parser import DataFrameParser
from superlinked.framework.dsl.executor.in_memory.in_memory_executor import InMemoryExecutor, InMemoryApp
from superlinked.framework.dsl.index.index import Index
from superlinked.framework.dsl.query.param import Param
from superlinked.framework.dsl.query.query import Query
from superlinked.framework.dsl.query.result import Result
from superlinked.framework.dsl.source.in_memory_source import InMemorySource
from superlinked.framework.dsl.space.text_similarity_space import TextSimilaritySpace
from superlinked.framework.dsl.space.recency_space import RecencySpace

# Configuration Variables
DB_CONFIG = {
    'dbname': 'your_dbname',
    'user': 'your_user',
    'password': 'your_password',
    'host': 'your_host',
    'port': '5432'
}

SSH_TUNNEL_CONFIG = {
    'ssh_host': 'bastion_host',
    'ssh_port': 22,
    'ssh_username': 'your_ssh_username',
    'ssh_password': 'your_ssh_password'  # or use 'ssh_pkey' for key-based authentication
}

USE_SSH_TUNNEL = False  # Set to True if SSH tunnel is required

MODEL = "sentence-transformers/paraphrase-MiniLM-L3-v2"
YEAR_IN_DAYS = 365
TOP_N = 10

# Shared variables and lock for progress tracking
progress_lock = threading.Lock()
processed_rows = 0
total_rows = 0

## Method Definitions

In [None]:
def get_ordered_result_tuples(result: Result, top_n: int) -> list[tuple[int]]:
    return [(i+1, int(entity.id_.object_id)) for i, entity in enumerate(result.entities[:top_n])]

def get_items_by_id_list(id_list_tuple: list[tuple[int]], df: pd.DataFrame) -> pd.DataFrame:
    if df.index.name != "id":
        df = df.set_index("id")
    result_df = df.loc[[id_tuple[1] for id_tuple in id_list_tuple]]
    print(result_df)
    result_df["order"] = [id_tuple[0] for id_tuple in id_list_tuple]
    return result_df

def parse_results(result: Result, df: pd.DataFrame, top_n: int = TOP_N) -> pd.DataFrame:
    id_tuples = get_ordered_result_tuples(result=result, top_n=top_n)
    return get_items_by_id_list(id_list_tuple=id_tuples, df=df)

def map_postgres_type_to_field(pg_type, is_primary_key=False):
    if is_primary_key:
        return IdField

    mapping = {
        'character varying': String,
        'varchar': String,
        'char': String,
        'text': String,
        'uuid': String,
        'int': Integer,
        'integer': Integer,
        'bigint': Integer,
        'smallint': Integer,
        'real': Integer,  # Assuming real numbers are handled as integers, adjust if needed
        'double precision': Integer,  # Adjust if needed
        'numeric': Integer,  # Adjust if needed
        'decimal': Integer,  # Adjust if needed
        'boolean': Integer,  # Assuming boolean is handled as integer, adjust if needed
        'timestamp': Timestamp,
        'timestamp with time zone': Timestamp,
        'timestamp without time zone': Timestamp,
        'date': Timestamp,
        'json': Map,
        'jsonb': Map,
        'hstore': Map,
    }

    return mapping.get(pg_type, String)  # Default to String if type not found

def to_camel_case(snake_str):
    components = snake_str.split('_')
    return ''.join(x.title() for x in components)

def create_class(table_name, table_schema):
    class_name = to_camel_case(table_name) + "Schema"

    class_attributes = {'__annotations__': {}}
    for col_name, field_cls in table_schema.items():
        class_attributes['__annotations__'][col_name] = field_cls

    # Dynamically creating the class
    dynamic_class = type(class_name, (), class_attributes)
    # Applying the @schema decorator to transform the class
    decorated_class = schema(dynamic_class)  # Ensure 'schema' here refers to the decorator
    # Assign the class to Global Scope
    globals()[class_name] = decorated_class

    return decorated_class

def process_table(table, engine, total_rows):
    global processed_rows
    with engine.connect() as conn:
        # Use SQLAlchemy to read the table into a DataFrame
        df = pd.read_sql_table(table, conn)

        # Execute schema information query
        schema_query = text(f"""
            SELECT
                c.column_name,
                c.data_type,
                CASE
                    WHEN tc.constraint_type = 'PRIMARY KEY' THEN TRUE
                    ELSE FALSE
                END as is_primary_key
            FROM
                information_schema.columns c
                LEFT JOIN information_schema.key_column_usage kcu
                    ON c.table_name = kcu.table_name AND c.column_name = kcu.column_name
                LEFT JOIN information_schema.table_constraints tc
                    ON kcu.constraint_name = tc.constraint_name
            WHERE
                c.table_name = '{table}' AND c.table_schema = 'public'
        """)
        result = conn.execute(schema_query)
        schema_info = result.fetchall()

        # Mapping PostgreSQL data types to your schema classes
        schema = {col[0]: map_postgres_type_to_field(col[1], col[2]) for col in schema_info}

        # Create a class for this table
        cls = create_class(table, schema)

        # Update progress
        with progress_lock:
            # Retrieve row count for this table for progress calculation
            row_count_result = conn.execute(text(f"SELECT COUNT(*) FROM {table}"))
            current_table_rows = row_count_result.fetchone()[0]

            processed_rows += current_table_rows
            progress = (processed_rows / total_rows) * 100
            print(f"Completed {table}: {progress:.2f}% complete")

        return table, df, schema, cls

def load_postgres_tables_and_schemas(db_config, use_ssh_tunnel=False, ssh_config=None):
    global total_rows
    # Setup SSH tunnel if required
    if use_ssh_tunnel and ssh_config:
        tunnel = SSHTunnelForwarder(
            (ssh_config['ssh_host'], ssh_config['ssh_port']),
            ssh_username=ssh_config['ssh_username'],
            ssh_password=ssh_config['ssh_password'],
            remote_bind_address=(db_config['host'], int(db_config['port']))
        )
        tunnel.start()
        host = '127.0.0.1'
        port = tunnel.local_bind_port
    else:
        host = db_config['host']
        port = db_config['port']

     # Setup SQLAlchemy engine
    db_uri = f"postgresql://{db_config['user']}:{db_config['password']}@{host}:{port}/{db_config['dbname']}"
    engine = create_engine(db_uri)

    # Get table names
    with engine.connect() as conn:
        result = conn.execute(text("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"))
        tables = [row[0] for row in result]

    # Get total row count for all tables for progress calculation
    with engine.connect() as conn:
        for table in tables:
            row_count_result = conn.execute(text(f"SELECT COUNT(*) FROM {table}"))
            row_count, = row_count_result.fetchone()
            total_rows += row_count

    dataframes = {}
    schemas = {}
    classes = {}

    # Parallel processing of tables
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = {executor.submit(process_table, table, engine, total_rows): table for table in tables}
        for future in concurrent.futures.as_completed(futures):
            table = futures[future]
            try:
                table, df, schema, cls = future.result()
                df = df.fillna('')
                # INTRODUCE DATETIME FORMAT DETECTION AND CONVERSION TO UNIX EPOCH TIME HERE
                df.head()
                dataframes[table] = df
                schemas[table] = schema
                classes[table] = cls
                # print(f"Completed processing {table}")
            except Exception as e:
                print(f"Error processing table {table}: {e}")

    if use_ssh_tunnel and ssh_config:
        tunnel.stop()

    print("Data processing completed.")
    return dataframes, schemas, classes

## Pull data from DB into DataFrames and auto-generate the Classes

In [None]:
# Run the script
total_rows = 0
processed_rows = 0
dataframes, schemas, classes = load_postgres_tables_and_schemas(DB_CONFIG, USE_SSH_TUNNEL, SSH_TUNNEL_CONFIG)

Completed auth_group: 0.00% complete
Completed django_content_type: 0.00% complete
Completed django_migrations: 0.01% complete
Completed organizations_organization: 0.01% complete
Completed django_session: 0.29% complete
Completed auth_permission: 0.30% complete
Completed auth_user: 1.01% complete
Completed artists_artist: 1.05% complete
Completed account_emailaddress: 1.32% complete
Completed account_emailconfirmation: 1.32% complete
Completed artists_artist_genres: 1.33% complete
Completed django_site: 1.33% complete
Completed clubs_club_genres: 1.33% complete
Completed events_timetype: 1.33% complete
Completed events_event_organizations: 1.35% complete
Completed events_compticket: 1.87% complete
Completed events_act: 1.94% complete
Completed events_event: 1.96% complete
Completed events_referal: 1.97% complete
Completed events_tickettype: 2.00% complete
Completed fan_campaigns_fancampaign: 2.00% complete
Completed orders_promocode: 2.02% complete
Completed events_promotion: 2.04% co

### We can now use variables dataframes, schemas, and classes
Print out the data to visually inspect and verify.

In [None]:
print("\nCreated Classes and DataFrame Summaries:")
for table_name, cls in classes.items():
    print(f"\nClass for table '{table_name}': {cls()}")

    print("\nFields:")
    for field_name, field_type in schemas[table_name].items():
        print(f"  {field_name}: {field_type.__name__}")

    print(f"\nDataFrame Summary for '{table_name}':")
    dataframes[table_name].info()


Created Classes and DataFrame Summaries:

Class for table 'django_content_type': <superlinked.framework.common.schema.schema.DecoratedType object at 0x00000143A23C1050>

Fields:
  id: IdField
  app_label: String
  model: String

DataFrame Summary for 'django_content_type':
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id         52 non-null     int64 
 1   app_label  52 non-null     object
 2   model      52 non-null     object
dtypes: int64(1), object(2)
memory usage: 1.3+ KB

Class for table 'auth_group': <superlinked.framework.common.schema.schema.DecoratedType object at 0x00000143EEC13B90>

Fields:
  id: IdField
  name: String

DataFrame Summary for 'auth_group':
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      0 non-n

## Semi Optimized Paradigm
In this next section we have two semi optimized examples, each making use of our programatically generated classes and dataframes. I've hardcodes keys for two classes that have been created using my ticket platform's PostgreSQL DB.

In [None]:
event_class = classes['events_event']
event_instance = event_class()
event_title_space = TextSimilaritySpace(text=event_instance.title, model=MODEL)
event_description_space = TextSimilaritySpace(text=event_instance.description, model=MODEL)
event_index = Index(spaces=[event_title_space, event_description_space])
event_df_parser = DataFrameParser(schema=event_instance)
event_source: InMemorySource = InMemorySource(event_instance, parser=event_df_parser)

artist_class = classes['artists_artist']
artist_instance = artist_class()
artist_name_space = TextSimilaritySpace(text=artist_instance.name, model=MODEL)
artist_description_space = TextSimilaritySpace(text=artist_instance.description, model=MODEL)
artist_index = Index(spaces=[artist_name_space, artist_description_space])
artist_df_parser = DataFrameParser(schema=artist_instance)
artist_source: InMemorySource = InMemorySource(artist_instance, parser=artist_df_parser)

executor: InMemoryExecutor = InMemoryExecutor(sources=[event_source, artist_source], indices=[event_index, artist_index])
app: InMemoryApp = executor.run()

event_source.put([dataframes['events_event']])
artist_source.put([dataframes['artists_artist']])

Let's create and run some simple queries on Artists and Events. ***Note** - we run both queries using the same InMemoryApp.


In [None]:
simple_event_query = (
    Query(event_index, weights={
        event_title_space: Param("event_title_weight"),
        event_description_space: Param("event_description_weight")
    })
    .find(event_instance)
    .similar(event_title_space.text, Param("query_text"))
    .similar(event_description_space.text, Param("query_text"))
)

simple_artist_query = (
    Query(artist_index, weights={
        artist_name_space: Param("artist_name_weight"),
        artist_description_space: Param("artist_description_weight")
    })
    .find(artist_instance)
    .similar(artist_name_space.text, Param("query_text"))
    .similar(artist_description_space.text, Param("query_text"))
)

result: Result = app.query(
    simple_event_query,
    query_text="Tied presents",
    event_description_weight=1,
    event_title_weight=1
)
parse_results(result, dataframes['events_event'], 10)

result: Result = app.query(
    simple_artist_query,
    query_text="Dabura",
    artist_description_weight=1,
    artist_name_weight=1
)
parse_results(result, dataframes['artists_artist'], 10)

                         date_created                    date_modified  \
id                                                                       
94   2019-11-19 17:24:46.927000+00:00 2019-12-02 16:40:47.760000+00:00   
748  2022-11-18 18:35:03.350379+00:00 2022-11-18 19:24:42.986523+00:00   
794  2023-01-02 11:50:29.528024+00:00 2023-01-21 10:27:37.799068+00:00   
1093 2023-10-10 00:18:26.492236+00:00 2023-10-20 02:23:11.360050+00:00   
415  2022-01-26 15:02:30.572000+00:00 2022-02-19 00:29:19.531000+00:00   
1135 2023-11-16 17:51:00.938912+00:00 2023-11-16 17:51:00.984611+00:00   
74   2020-01-27 19:11:50.785000+00:00 2020-02-16 00:19:02.356000+00:00   
404  2021-07-31 16:07:22.717000+00:00 2021-07-31 16:53:41.495000+00:00   
241  2021-10-05 12:18:54.854000+00:00 2021-11-06 03:40:41.565000+00:00   
503  2022-05-25 13:17:27.471000+00:00 2022-06-23 19:02:06.222000+00:00   

                                          slug       date  age_limit  \
id                                     

Unnamed: 0_level_0,date_created,date_modified,name,slug,creator_id,last_modifier_id,description,image,order
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1452,2023-04-04 22:43:23.855363+00:00,2023-04-05 19:47:55.366555+00:00,Dabura,dabura,7865.0,7865.0,,,1
1075,2022-07-26 18:01:33.357000+00:00,2022-07-26 18:01:33.357000+00:00,DABURA & ROSTEK,dabura-_-rostek,,,,,2
1424,2023-03-08 19:31:53.489953+00:00,2023-03-08 19:31:53.489980+00:00,DABURA & ROSTEK,dabura-rostek,,,,,3
247,2019-02-20 21:37:25.819000+00:00,2019-02-20 21:37:25.819000+00:00,Dabura (B-Day Set),dabura-b-day-set,,,,,4
1478,2023-05-04 19:45:52.341205+00:00,2023-05-04 19:45:52.341231+00:00,Dabura B2B Watry,dabura-b2b-watry,,,,,5
1079,2022-08-05 20:52:55.232000+00:00,2022-08-05 20:52:55.232000+00:00,Daaku,daaku,,,,,6
266,2019-03-12 18:18:42.393000+00:00,2019-03-12 18:18:42.393000+00:00,Dabura ft. Raj,dabura-ft-raj,,,,,7
6,2019-06-20 18:10:04.097000+00:00,2019-06-20 18:10:04.097000+00:00,Dabura & Raj & Mo,dabura-raj-mo,,,,,8
322,2018-12-14 11:11:30.980000+00:00,2018-12-14 11:11:30.980000+00:00,Dagō,dago,,,,,9
533,2021-09-07 18:12:15.589000+00:00,2021-09-07 18:12:15.589000+00:00,DAVI,davi,,,,,10


## Updated Paradigm
In this section we use a GUI to select tables and programatically load all sources and indices into a single executor. We also keep track of instantiated class schemas and the dynamically generated spaces.

In [None]:
# Define a dict to store everything
app_data = {}

# Loop through each class in the classes dictionary
for table_name, class_def in classes.items():
    class_instance = class_def()
    class_spaces = []
    class_fields_spaces_dict = {}

    # Iterate over the schema to create spaces for text-based fields only
    for field_name, field_type in schemas[table_name].items():
        # Check if the field is of a type that is compatible with TextSimilaritySpace
        if field_type.__name__ in ['String']:  # Include only text-like fields (for now)
            text_field = getattr(class_instance, field_name, None)
            if text_field is not None:
                # Create a TextSimilaritySpace for the text field
                class_space = TextSimilaritySpace(text=text_field, model=MODEL)
                class_spaces.append(class_space)
                class_fields_spaces_dict[field_name] = class_space

    if class_spaces:
        # Create an index, DataFrameParser, and source for the class
        class_index = Index(spaces=class_spaces)
        class_df_parser = DataFrameParser(schema=class_instance)
        class_source = InMemorySource(class_instance, parser=class_df_parser)

        # Add the instance, source, index, parser, and spaces to app_data
        app_data[table_name] = {'instance': class_instance, 'index': class_index, 'source': class_source, 'spaces': class_fields_spaces_dict}

# Create an executor using app_data and run the application
executor = InMemoryExecutor(sources=[x['source'] for x in app_data.values()], indices=[x['index'] for x in app_data.values()])
app = executor.run()

In [None]:
def create_table_selection_gui(tables, on_submit):
    root = tk.Tk()
    root.title("Select Tables to Process")

    # Create a queue to store selected tables
    table_queue = queue.Queue()

    # Frame for the checkboxes and scrollbar
    frame = ttk.Frame(root)
    frame.pack(fill=tk.BOTH, expand=True)

    # Canvas for adding the scrollbar
    canvas = tk.Canvas(frame)
    canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

    # Scrollbar
    scrollbar = ttk.Scrollbar(frame, orient=tk.VERTICAL, command=canvas.yview)
    scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

    # Configure canvas
    canvas.configure(yscrollcommand=scrollbar.set)
    canvas.bind('<Configure>', lambda e: canvas.configure(scrollregion=canvas.bbox("all")))

    # Function to handle mouse wheel scroll
    def _on_mousewheel(event):
        canvas.yview_scroll(int(-1*(event.delta/120)), "units")

    # Bind the scroll event for Windows and macOS
    canvas.bind("<MouseWheel>", _on_mousewheel)
    # For Linux, you may need to bind to Button-4 and Button-5 instead
    # canvas.bind("<Button-4>", _on_mousewheel)
    # canvas.bind("<Button-5>", _on_mousewheel)

    # Frame for checkboxes
    checkbox_frame = ttk.Frame(canvas)
    canvas.create_window((0, 0), window=checkbox_frame, anchor="nw")

    # Variable to store selected tables
    selected_tables = []

    def submit_and_close():
        for table, var in checkboxes.items():
            if var.get():
                table_queue.put(table)
        root.destroy()

    # Create a checkbox for each table
    checkboxes = {table: tk.BooleanVar() for table in tables}
    for table, var in checkboxes.items():
        ttk.Checkbutton(checkbox_frame, text=table, variable=var).pack()

    # Submit button
    submit_button = ttk.Button(root, text="Submit", command=submit_and_close)
    submit_button.pack()

    root.mainloop()

    # After the GUI is closed, process each table in the queue
    while not table_queue.empty():
        table_name = table_queue.get()
        process_table(table_name)

def process_table(table_name):
    if table_name in dataframes and dataframes[table_name] is not None:
        print(f"Processing table: {table_name}")
        primary_key_field = next((field_name for field_name, field_type in schemas[table_name].items() if field_type.__name__ == 'IdField'), None)
        print(f"Detected primary key: {primary_key_field}")
        if primary_key_field == 'id':
            filtered_df = dataframes[table_name].dropna(subset=[primary_key_field])
            if not filtered_df.empty:
                app_data[table_name]['source'].put([filtered_df])
                print(f"Data has been added for table {table_name}")
            else:
                print(f"No data to add for table {table_name}")
        else:
            print(f"Skipping table {table_name} due to missing IdField as primary key.")
    else:
        print(f"Table {table_name} not found or has no data")

# Call GUI and Vectorize selected tables
create_table_selection_gui(dataframes.keys(), process_table)

Processing table: artists_artist
Detected primary key: id
Data has been added for table artists_artist
Processing table: promoters_promoter
Detected primary key: id
Data has been added for table promoters_promoter
Processing table: clubs_club
Detected primary key: id
Data has been added for table clubs_club


In [None]:
# Automatically build a simple query simply by passing in a table name
def build_query(table_name):
    table_data = app_data[table_name]
    instance = table_data['instance']
    index = table_data['index']
    spaces = table_data['spaces']

    weights = {}
    weight_params = {}

    # Dynamically generate weights and parameter names
    for field_name, space in spaces.items():
        weight_param_name = f"{table_name}_{field_name}_weight"
        weights[space] = Param(weight_param_name)
        weight_params[weight_param_name] = 1  # Example default value

    # Build the query
    query = Query(index, weights=weights).find(instance)
    for field_name, space in spaces.items():
        query = query.similar(space.text, Param("query_text"))

    return query, weight_params

# Example usage
query, weight_params = build_query('artists_artist')
print(weight_params) # e.g., {'artists_artist_name_weight': 1, 'artists_artist_slug_weight': 1, 'artists_artist_description_weight': 1, 'artists_artist_image_weight': 1}

# Easily adjust the weights by modifying the dict
weight_params['artists_artist_name_weight'] = 10
weight_params['artists_artist_image_weight'] = 0

# Example of executing the query
result = app.query(
    query,
    query_text="Dabura",
    **weight_params  # Unpack the weight parameters here
)
parsed_results = parse_results(result, dataframes['artists_artist'], 10)

{'artists_artist_name_weight': 1, 'artists_artist_slug_weight': 1, 'artists_artist_description_weight': 1, 'artists_artist_image_weight': 1}
                         date_created                    date_modified  \
id                                                                       
1452 2023-04-04 22:43:23.855363+00:00 2023-04-05 19:47:55.366555+00:00   
1424 2023-03-08 19:31:53.489953+00:00 2023-03-08 19:31:53.489980+00:00   
1075 2022-07-26 18:01:33.357000+00:00 2022-07-26 18:01:33.357000+00:00   
247  2019-02-20 21:37:25.819000+00:00 2019-02-20 21:37:25.819000+00:00   
1478 2023-05-04 19:45:52.341205+00:00 2023-05-04 19:45:52.341231+00:00   
1079 2022-08-05 20:52:55.232000+00:00 2022-08-05 20:52:55.232000+00:00   
266  2019-03-12 18:18:42.393000+00:00 2019-03-12 18:18:42.393000+00:00   
6    2019-06-20 18:10:04.097000+00:00 2019-06-20 18:10:04.097000+00:00   
322  2018-12-14 11:11:30.980000+00:00 2018-12-14 11:11:30.980000+00:00   
533  2021-09-07 18:12:15.589000+00:00 2021-09

In [None]:
# Example usage
another_query, more_weight_params = build_query('promoters_promoter')
more_weight_params['promoters_promoter_name_weight'] = 10

# Example of executing the query
result = app.query(
    another_query,
    query_text="The Blox Office",
    **more_weight_params  # Unpack the weight parameters here
)
parsed_results = parse_results(result, dataframes['promoters_promoter'], 10)

                        date_created                    date_modified  \
id                                                                      
43  2018-12-19 00:14:11.671000+00:00 2018-12-19 00:14:11.671000+00:00   
135 2019-11-19 13:24:23.224000+00:00 2019-11-19 13:24:23.224000+00:00   
173 2022-04-10 15:28:18.991000+00:00 2022-04-10 15:28:18.991000+00:00   
233 2022-12-01 20:14:04.309467+00:00 2022-12-01 20:14:04.309481+00:00   
428 2023-11-20 01:08:33.602174+00:00 2023-11-20 01:08:33.602199+00:00   
94  2021-07-08 06:21:51.354000+00:00 2021-07-08 06:21:51.354000+00:00   
57  2018-11-08 13:45:13.757000+00:00 2018-11-08 13:45:13.757000+00:00   
411 2023-11-14 07:42:37.666172+00:00 2023-11-14 07:42:37.666198+00:00   
414 2023-11-16 01:44:07.406288+00:00 2023-11-16 01:44:07.406319+00:00   
13  2019-04-29 12:32:10.625000+00:00 2019-04-29 12:32:10.625000+00:00   

                      name                   slug  
id                                                 
43         The Blox