## This file is for the evaluation of the implemented semantic searches.

#### Preparations to compare with simple matching search.

In [43]:
import sqlite3


def substring_search(substring, max_results):
    
    conn = sqlite3.connect('../ontology_creation/databases/publications-database.db')
    cursor = conn.cursor()

    query = """
    SELECT publication_id FROM Publication
    WHERE title LIKE ? OR abstract LIKE ?;
    """
    cursor.execute(query, ('%' + substring + '%', '%' + substring + '%'))
    
    # Fetch all rows that match the query
    publication_ids = [id for id, in cursor.fetchall()]
    
    conn.close()
    
    return publication_ids


### Comparisson of the searches

In [44]:
import pickle
import time
from tqdm import tqdm
from search_precalculated import SemanticSearchPrecalculated
from search_individual_calculations import SemanticSearchIndividualCalculations


# Load the data of the precalculated search
with open("./temporary-storage/semantic-search-object-precalculated.pkl", "rb") as file_1:
    precalculated_search = pickle.load(file_1)

# Load the data of the individual calculations search
with open("./temporary-storage/semantic-search-object-individual-calculations.pkl", "rb") as file_2:
    individual_calculation_search = pickle.load(file_2)

search_queries = ['Lithography', 'Machine Learning', 'Semantic Search', 'Polymer', 'Spectroscopy', 'Cloud Computing']
input_threshold = 0.8
search_threshold = 0.01
max_results = 20
include_direct_findings = True

results_precalculated = []
results_individual_calculations = []
results_substring_search = []

calculation_times = []

for search_query in tqdm(search_queries):
    start_time_precalculated = time.time()
    results_precalculated.append(precalculated_search.search_for_query(search_query,input_threshold, search_threshold, max_results, include_direct_findings))
    end_time_precalculated = time.time()
    time_precalculated = round(end_time_precalculated - start_time_precalculated, 2)

    start_time_individual_calculations = time.time()
    results_individual_calculations.append(individual_calculation_search.search_for_query(search_query,input_threshold, search_threshold, max_results, include_direct_findings))
    end_time_individual_calculations = time.time()
    time_individual_calculations = round(end_time_individual_calculations - start_time_individual_calculations, 2)

    start_time_substring_search = time.time()
    results_substring_search.append(substring_search(search_query, max_results))
    end_time_substring_search = time.time()
    time_substring_search = round(end_time_substring_search - start_time_substring_search, 2)

    calculation_times.append((time_individual_calculations, time_precalculated, time_substring_search))

print("Times: ", calculation_times)

100%|██████████| 6/6 [11:02<00:00, 110.45s/it]

Times:  [(43.37, 5.49, 0.03), (10.46, 10.1, 0.03), (10.63, 11.76, 0.03), (521.3, 34.09, 0.02), (2.8, 2.99, 0.02), (4.84, 4.69, 0.02)]





In [45]:
for i in range(0,6):
    print(results_precalculated[i])
    print(results_individual_calculations[i])
    print(results_substring_search[i])
    print("\n")
    print("\n")

[(1.0, 'urn:semantic_search:Publication:2339'), (0.6018763548967829, 'urn:semantic_search:Publication:1687'), (0.12593867304595344, 'urn:semantic_search:Publication:1968'), (0.05415229893592905, 'urn:semantic_search:Publication:1774'), (0.025120791088428643, 'urn:semantic_search:Publication:327'), (0.01763141422643348, 'urn:semantic_search:Publication:580'), (0.01763141422643348, 'urn:semantic_search:Publication:1414'), (0.01763141422643348, 'urn:semantic_search:Publication:1871'), (0.015159901894687237, 'urn:semantic_search:Publication:1190'), (0.015159901894687237, 'urn:semantic_search:Publication:1957'), (0.015159901894687237, 'urn:semantic_search:Publication:2152'), (0.013643911705218512, 'urn:semantic_search:Publication:1038'), (0.013643911705218512, 'urn:semantic_search:Publication:1208'), (0.013643911705218512, 'urn:semantic_search:Publication:1551'), (0.010611931326281068, 'urn:semantic_search:Publication:3433'), (0.010611931326281064, 'urn:semantic_search:Publication:1002'), (

In [46]:
def get_publication_id_from_urn(publication_urn):
        return publication_urn.split(':')[-1]

In [47]:
from database_adapter import *

i = 0

subject = results_individual_calculations[i]
# subject = results_precalculated[i]
# subject = results_substring_search[i][:10]

print("Current competency: ", search_queries[i])
print()

k = 0

for certainty, pub_urn in subject:
    pub_id = get_publication_id_from_urn(pub_urn)
    title = get_title_of_publication_with_id(pub_id)
    abstract = get_abstract_of_publication_with_id(pub_id)
    
    print("Title ", k)
    print()
    print(title)
    print()
    print()
    print(abstract)
    print()
    print()
    k += 1

Current competency:  Lithography

Title  0

X-ray optics made by X-ray lithography: Process optimization and quality control


Grating based X-ray phase contrast imaging sets out to overcome the limits of conventional X-ray imaging in the detection of subtle density differences and opens a way to characterize a sample’s microstructure without the need for ultrahigh spatial resolution. The technique relies on grating structures with micrometric periods and extreme aspect ratio – their fabrication by X-ray lithography with optimal structure quality is the topic of this work.


Title  1

Molecular understanding of cytoneme-based Wnt trafficking


Photolithography is an indispensable tool in the modern microfabrication of integrated electronics and optical devices on several length scales. The task at hand is to replicate a desired pattern, encoded in a photomask, on a photoresist-covered wafer. Recent decades have witnessed an impressive development of photolithographic equipment, enablin