In [1]:
from storage import MongoDBStorage
from service import MongoDBSearchService
from datetime import datetime
from datetime import timedelta

In [2]:
with open("db_host", "r") as file:
    host = file.read().rstrip()

storage = MongoDBStorage(parameters={
        "host_url": host,
        "collection": "digital_footprint_clean"
    })
service = MongoDBSearchService(storage)

In [3]:
def print_ids(df_list):
    had_footprint_objects = False
    for df in df_list:
        had_footprint_objects = True
        print(f"ID = {df['_id']} , file name = {df['additional_info']['file_name']}")

    if not had_footprint_objects:
        print("No digital footprint objects for given query")

Getting digital footprint objects by their creation date.

In [4]:
# Getting DF by dates
print(f"Finding footprint objects that were created before {datetime.now()}")
result = service.find_by_query(service.get_query_find_before_date(datetime.now()))
print_ids(result)

print(f"\n\nFinding footprint objects that were created after {datetime.now()}")
result = service.find_by_query(service.get_query_find_after_date(datetime.now()))
print_ids(result)

print(f"\n\nFinding footprint objects that were created in period {datetime.now() - timedelta(days=50)} - {datetime.now()}")
result = service.find_by_query(
    service.get_query_find_in_period(datetime.now() - timedelta(days=50), datetime.now())
)
print_ids(result)

Finding footprint objects that were created before 2024-05-11 16:34:22.172422
ID = 661931f5d172007aa8b18204 , file name = probabilistic_modelling.mp4
ID = 661931fcd172007aa8b18205 , file name = system_oriented_analysis_and_informational_modelling.mp4
ID = 661931ffd172007aa8b18206 , file name = factographical_information_analysis.mp4


Finding footprint objects that were created after 2024-05-11 16:34:22.877594
No digital footprint objects for given query


Finding footprint objects that were created in period 2024-03-22 16:34:22.939595 - 2024-05-11 16:34:22.939595
ID = 661931f5d172007aa8b18204 , file name = probabilistic_modelling.mp4
ID = 661931fcd172007aa8b18205 , file name = system_oriented_analysis_and_informational_modelling.mp4
ID = 661931ffd172007aa8b18206 , file name = factographical_information_analysis.mp4


Getting digital footprint object by ID

In [5]:
result = service.find_by_query(service.get_query_find_by_id("661931f5d172007aa8b18204"))
print_ids(result)

ID = 661931f5d172007aa8b18204 , file name = probabilistic_modelling.mp4


Getting digital footprint objects by type

In [6]:
result = service.find_by_query(service.get_query_find_by_type("video"))
print_ids(result)

ID = 661931f5d172007aa8b18204 , file name = probabilistic_modelling.mp4
ID = 661931fcd172007aa8b18205 , file name = system_oriented_analysis_and_informational_modelling.mp4
ID = 661931ffd172007aa8b18206 , file name = factographical_information_analysis.mp4


Getting digital footprint objects by combining different queries (using "AND")

In [7]:
result = service.aggregate_filters_with_and([
    service.get_query_find_by_type("video"),
    service.get_query_find_by_id("661931f5d172007aa8b18204")
])
print_ids(result)

ID = 661931f5d172007aa8b18204 , file name = probabilistic_modelling.mp4


Getting digital footprint objects by combining different queries (using "OR")

In [8]:
result = service.aggregate_filters_with_or([
    service.get_query_find_by_type("video"),
    service.get_query_find_by_id("661931f5d172007aa8b18204")
])
print_ids(result)

ID = 661931f5d172007aa8b18204 , file name = probabilistic_modelling.mp4
ID = 661931fcd172007aa8b18205 , file name = system_oriented_analysis_and_informational_modelling.mp4
ID = 661931ffd172007aa8b18206 , file name = factographical_information_analysis.mp4


Getting digital footprint objects by performing summary full text search

In [10]:
query = service.get_query_find_by_text_summary(text="Literature review is one of the most important parts of scientific work")
result = service.find_by_query(query)
print_ids(result)

ID = 661931ffd172007aa8b18206 , file name = factographical_information_analysis.mp4


Getting digital footprint objects by performing extracted text full text search

In [12]:
query = service.get_query_find_by_text_extracted_text(text="system-oriented analysis and information modeling")
result = service.find_by_query(query)
print_ids(result)

ID = 661931fcd172007aa8b18205 , file name = system_oriented_analysis_and_informational_modelling.mp4


Getting digital footprint objects by keywords

In [13]:
result = service.find_by_query(service.get_query_find_by_keywords(["translational research"]))
print_ids(result)

ID = 661931fcd172007aa8b18205 , file name = system_oriented_analysis_and_informational_modelling.mp4
ID = 661931ffd172007aa8b18206 , file name = factographical_information_analysis.mp4


Getting digital footprint objects by topics

In [14]:
result = service.find_by_query(service.get_query_find_by_topics(["system analysis"]))
print_ids(result)

ID = 661931fcd172007aa8b18205 , file name = system_oriented_analysis_and_informational_modelling.mp4


Getting digital footprint objects by named entities

In [15]:
query = service.get_named_entities_filters(named_entities={
        "PERSON": ["Alexandra Klimova"],
        "ORG": ["Microsoft"]
    })
result = service.aggregate_filters_with_and(query)
print_ids(result)

ID = 661931ffd172007aa8b18206 , file name = factographical_information_analysis.mp4


Getting digital footprint objects by their file name in "additional_info"

In [16]:
result = service.aggregate_filters_with_or(service.get_additional_info_filters(
    {
        "file_name": "probabilistic_modelling.mp4"
    }
))
print_ids(result)

ID = 661931f5d172007aa8b18204 , file name = probabilistic_modelling.mp4
