Run this notebook in the same virtual environment with superlinked server
to ensure the same version of `superlinked` framework.

In [5]:
!pip freeze | grep superlinked

superlinked==29.1.0
superlinked-server==1.37.0


In [6]:
from pathlib import Path
import sys

# Determine current working directory
cwd = Path.cwd()

# Adapt logic for your specific project structure
if cwd.name == "intelligent-file-search":
    project_dir = cwd
elif cwd.name == "notebooks":
    project_dir = cwd.parent
else:
    # fallback (e.g., if inside a subfolder deeper in notebooks or dataset)
    project_dir = cwd
    while project_dir.name != "intelligent-file-search" and project_dir != project_dir.parent:
        project_dir = project_dir.parent

superlinked_app_dir = project_dir / "superlinked_app"
assert superlinked_app_dir.exists(), (
    f"{superlinked_app_dir} does not exist\n"
    "Are you sure you are in or below the intelligent-file-search directory?"
)

if str(project_dir) not in sys.path:
    sys.path.append(str(project_dir))
    print("project_dir is added to sys.path")
else:
    print("project_dir is already in sys.path")


project_dir is already in sys.path


### This is to use the collection_name=filesearch

In [7]:
import os
os.environ["APP_ID"] = "filesearch"


In [8]:
from superlinked import framework as sl

from superlinked_app.index import index, file_schema
from superlinked_app.query import query
from superlinked_app.api import vector_database

import pandas as pd

18:09:41 sentence_transformers.SentenceTransformer INFO   Load pretrained SentenceTransformer: sentence-transformers/paraphrase-MiniLM-L3-v2


Batches: 100%|██████████| 1/1 [00:00<00:00, 141.29it/s]

18:09:42 superlinked.framework.common.space.embedding.model_based.embedding_engine_manager INFO   Consider caching model dimension.
18:09:42 superlinked.framework.common.space.embedding.model_based.embedding_engine_manager INFO   Consider caching model dimension.
18:09:42 superlinked.framework.dsl.index.index INFO   initialized index





In [9]:
source = sl.InteractiveSource(file_schema)
executor = sl.InteractiveExecutor(
    sources=[source],
    indices=[index],
    vector_database=vector_database,
)
app = executor.run()

18:09:47 httpx INFO   HTTP Request: GET https://bad13d43-afc8-44b5-8b37-2fa1eb4f0236.eu-west-1-0.aws.cloud.qdrant.io:6333 "HTTP/1.1 200 OK"
18:09:47 httpx INFO   HTTP Request: GET https://bad13d43-afc8-44b5-8b37-2fa1eb4f0236.eu-west-1-0.aws.cloud.qdrant.io:6333/collections/filesearch/exists "HTTP/1.1 200 OK"
18:09:47 httpx INFO   HTTP Request: GET https://bad13d43-afc8-44b5-8b37-2fa1eb4f0236.eu-west-1-0.aws.cloud.qdrant.io:6333/collections/filesearch "HTTP/1.1 200 OK"
18:09:47 httpx INFO   HTTP Request: PUT https://bad13d43-afc8-44b5-8b37-2fa1eb4f0236.eu-west-1-0.aws.cloud.qdrant.io:6333/collections/filesearch/index?wait=true "HTTP/1.1 200 OK"
18:09:48 httpx INFO   HTTP Request: PUT https://bad13d43-afc8-44b5-8b37-2fa1eb4f0236.eu-west-1-0.aws.cloud.qdrant.io:6333/collections/filesearch/index?wait=true "HTTP/1.1 200 OK"
18:09:48 httpx INFO   HTTP Request: PUT https://bad13d43-afc8-44b5-8b37-2fa1eb4f0236.eu-west-1-0.aws.cloud.qdrant.io:6333/collections/filesearch/index?wait=true "HTTP/1.

In [None]:
from datetime import datetime
import pandas as pd
def date_to_unix(date_str):
    if date_str is None:
        return None
    return int(datetime.strptime(date_str, "%d/%m/%Y").timestamp())
# Natural language query parameters
params = {
    "natural_query": "Recent PDF files about fashion or sport",
    "limit": 5,
}

# Run the query with natural language interface
result = app.query(query, **params)

# Print the actual interpreted search parameters extracted by NLQ
print("Search Parameters Parsed by NLQ:")
print(result.metadata.search_params)

# Convert the results to a pandas DataFrame
df = sl.PandasConverter.to_pandas(result)

# Convert Unix timestamps to dd/mm/yyyy format in the DataFrame
import pandas as pd
df["Creation_Date"] = pd.to_datetime(df["Creation_Date"], unit="s").dt.strftime("%d/%m/%Y")
df["Last_Modified_Date"] = pd.to_datetime(df["Last_Modified_Date"], unit="s").dt.strftime("%d/%m/%Y")

display(df)

In [30]:

# These are the names of the semantic/numeric spaces in the same order as in your index.spaces list
space_names = [
    "content_similarity",
    "filename_similarity",
    "size_score",
    "creation_date_score",
    "modified_date_score"
]

rows = []

for entry in result.entries:
    partial_scores = dict(zip(space_names, entry.metadata.partial_scores))
    row = {"id": entry.id, **partial_scores}
    rows.append(row)
    
df = pd.DataFrame(rows)
df


Unnamed: 0,id,content_similarity,filename_similarity,size_score,creation_date_score,modified_date_score
0,id_54,0.0,0.0,0.0,0.0,0.0
1,id_56,0.0,0.0,0.0,0.0,0.0


In [22]:
from datetime import datetime
import pandas as pd

def date_to_unix(date_str):
    if date_str is None:
        return None
    return int(datetime.strptime(date_str, "%d/%m/%Y").timestamp())

# Define your parameters with human-readable date strings or None
params = {
    # Text similarity inputs
    "content_query": "cinema fashion",
    "filename_query": "technical",

    # Weights for semantic spaces (matching what's in query.weights)
    "content_weight": 1.0,
    "filename_weight": 0.8,
    "size_weight": 0.5,
    "creation_date_weight": 0.6,
    "modified_date_weight": 1.0,

    # Weights for similarity matching
    "similar_content_weight": 1.0,
    "similar_filename_weight": 1.0,

    # Size filters (in kilobytes)
    "min_size_kb": 1000,
    "max_size_kb": 10000,

    # Date filters (input as "dd/mm/yyyy" strings or None)
    "min_creation_date": date_to_unix("03/08/2021"),
    "max_creation_date": date_to_unix("31/12/2025"),
    "min_modified_date": date_to_unix(None),  # No lower bound
    "max_modified_date": date_to_unix("30/06/2035"),

    # Categorical filters (must match query.py)
    "filetype_include_any": ['pdf', 'docx'],
    "tags_include_any": ["cinema", "fashion"],

    # Result limit
    "limit": 5,
}

# Run the query
result = app.query(query, **params)

# Inspect parameters used
print(result.metadata.search_params)

df = sl.PandasConverter.to_pandas(result)

if df.empty:
    print("No results found for the query.")
    # Optionally, return or display empty data
    display(df)  # or simply do nothing / return
else:
    # Convert Unix timestamps (in milliseconds or seconds) to formatted dates
    # Adjust unit='ms' or 's' as needed based on your data

    # Example using 'ms' if timestamps are in milliseconds
    df["Creation_Date"] = pd.to_datetime(df["Creation_Date"], unit="s").dt.strftime("%d/%m/%Y")
    df["Last_Modified_Date"] = pd.to_datetime(df["Last_Modified_Date"], unit="s").dt.strftime("%d/%m/%Y")

    display(df)



18:15:48 superlinked.framework.query.query_dag_evaluator INFO   evaluated query
18:15:48 httpx INFO   HTTP Request: POST https://bad13d43-afc8-44b5-8b37-2fa1eb4f0236.eu-west-1-0.aws.cloud.qdrant.io:6333/collections/filesearch/points/query "HTTP/1.1 200 OK"
18:15:48 superlinked.framework.dsl.executor.query.query_executor INFO   executed query
{'content_query': 'cinema fashion', 'similar_content_weight': 1.0, 'filename_query': 'technical', 'similar_filename_weight': 1.0, 'limit': 5, 'select_param__': ['Content', 'File_Type', 'Tags', 'Filename', 'Path', 'Size_kB', 'Creation_Date', 'Last_Modified_Date'], 'min_size_kb': 1000.0, 'max_size_kb': 10000.0, 'min_creation_date': 1627941600, 'max_creation_date': 1767135600, 'min_modified_date': None, 'max_modified_date': 2066767200, 'filetype_include_all': None, 'filetype_include_any': ['pdf', 'docx'], 'filetype_exclude': None, 'tags_include_all': None, 'tags_include_any': ['cinema', 'fashion'], 'tags_exclude': None, 'natural_query': None, 'system_

Unnamed: 0,Content,File_Type,Tags,Filename,Path,Size_kB,Creation_Date,Last_Modified_Date,id,similarity_score
0,Model dress dress trend designer designer. Ten...,[pdf],"[fashion, sport]",dress.pdf,/cause/total/third/dress.pdf,6343.0,21/07/2023,05/10/2024,id_56,0.589101
1,Designer runway designer model model runway ru...,[pdf],"[fashion, cars, cinema]",runway.pdf,/realize/dog/increase/runway.pdf,9775.0,14/10/2021,27/03/2023,id_91,0.545448
2,Trend dress trend runway. Designer runway mode...,[docx],[fashion],trend.docx,/central/reach/trend.docx,9811.0,08/10/2025,09/11/2025,id_21,0.219757


In [32]:

# These are the names of the semantic/numeric spaces in the same order as in your index.spaces list
space_names = [
    "content_similarity",
    "filename_similarity",
    "size_score",
    "creation_date_score",
    "modified_date_score"
]

rows = []

for entry in result.entries:
    partial_scores = dict(zip(space_names, entry.metadata.partial_scores))
    row = {"id": entry.id, **partial_scores}
    rows.append(row)
    
df = pd.DataFrame(rows)
df


Unnamed: 0,id,content_similarity,filename_similarity,size_score,creation_date_score,modified_date_score
0,id_56,0.101449,0.019665,0.108949,0.129822,0.229218
1,id_91,0.081068,0.044035,0.089245,0.11757,0.213531


### Query with simple_query without LLM

In [25]:
from datetime import datetime
import pandas as pd

params = {
    "simple_query": "pdf docx last 180 years",
    "limit": 15,
    # You may also specify other parameters to override defaults
}

from superlinked_app.query import expand_simple_query_params

expanded_params = expand_simple_query_params(params)
result = app.query(query, **expanded_params)
# Inspect parameters used

print(result.metadata.search_params)

df = sl.PandasConverter.to_pandas(result)

if df.empty:
    print("No results found for the query.")
    # Optionally, return or display empty data
    df  # or simply do nothing / return
else:
    # Convert Unix timestamps (in milliseconds or seconds) to formatted dates
    # Adjust unit='ms' or 's' as needed based on your data

    # Example using 'ms' if timestamps are in milliseconds
    df["Creation_Date"] = pd.to_datetime(df["Creation_Date"], unit="s").dt.strftime("%d/%m/%Y")
    df["Last_Modified_Date"] = pd.to_datetime(df["Last_Modified_Date"], unit="s").dt.strftime("%d/%m/%Y")

    display(df)


18:17:13 superlinked.framework.query.query_dag_evaluator INFO   evaluated query
18:17:14 httpx INFO   HTTP Request: POST https://bad13d43-afc8-44b5-8b37-2fa1eb4f0236.eu-west-1-0.aws.cloud.qdrant.io:6333/collections/filesearch/points/query "HTTP/1.1 200 OK"
18:17:14 superlinked.framework.dsl.executor.query.query_executor INFO   executed query
{'content_query': None, 'similar_content_weight': 1.0, 'filename_query': None, 'similar_filename_weight': 1.0, 'limit': 15, 'select_param__': ['Content', 'File_Type', 'Tags', 'Filename', 'Path', 'Size_kB', 'Creation_Date', 'Last_Modified_Date'], 'min_size_kb': None, 'max_size_kb': None, 'min_creation_date': -3926018996, 'max_creation_date': 1754344799, 'min_modified_date': None, 'max_modified_date': None, 'filetype_include_all': None, 'filetype_include_any': ['pdf', 'docx'], 'filetype_exclude': None, 'tags_include_all': None, 'tags_include_any': None, 'tags_exclude': None, 'natural_query': None, 'system_prompt_param__': "Extract search parameters f

Unnamed: 0,Content,File_Type,Tags,Filename,Path,Size_kB,Creation_Date,Last_Modified_Date,id,similarity_score
0,Award movie scene director scene movie. Footba...,[pdf],"[cinema, sport, cars]",award.pdf,/financial/under/three/award.pdf,8383.0,24/11/2001,16/12/2011,id_20,0.0
1,Athlete football athlete football athlete bask...,[pdf],[sport],athlete.pdf,/still/everything/economic/athlete.pdf,1114.0,28/05/2010,16/02/2014,id_24,0.0
2,Basketball tennis football championship footba...,[pdf],"[sport, cars]",basketba.pdf,/compare/basketba.pdf,7216.0,02/08/2020,10/07/2022,id_51,0.0
3,Football tennis championship football champion...,[docx],[sport],football.docx,/general/football.docx,4638.0,05/01/2023,09/01/2023,id_54,0.0
4,Championship football football championship. F...,[pdf],[sport],champion.pdf,/true/recent/you/champion.pdf,5284.0,21/06/2011,06/04/2023,id_55,0.0
5,Model dress dress trend designer designer. Ten...,[pdf],"[fashion, sport]",dress.pdf,/cause/total/third/dress.pdf,6343.0,21/07/2023,05/10/2024,id_56,0.0
6,Race engine race model engine engine. Runway d...,[pdf],"[cars, fashion]",race.pdf,/fact/race.pdf,457.0,06/04/2003,12/04/2022,id_57,0.0
7,Basketball athlete tennis football championshi...,[pdf],"[sport, fashion, cinema]",basketba.pdf,/argue/basketba.pdf,6608.0,09/11/2012,17/10/2014,id_58,0.0
8,Designer runway model designer trend trend mod...,[pdf],"[fashion, sport, cinema]",runway.pdf,/night/modern/record/runway.pdf,8042.0,19/08/2016,01/10/2022,id_62,0.0
9,Championship tennis athlete. Dress model trend...,[pdf],"[sport, fashion]",champion.pdf,/movie/friend/special/champion.pdf,8120.0,16/12/2013,05/07/2022,id_68,0.0
