# Seaching CogStack

This script is designed to be a template for cogstack searches

In [None]:
from cogstack import CogStack, print_dataframe
from cogstack import read_from_env
import os
hosts, api_key, (username, password) = read_from_env()

# Login and Initialise

In [None]:
cs = CogStack.with_api_key_auth(hosts=hosts, api_key=api_key)
#cs = CogStack.with_basic_auth(hosts=hosts, username=username, password=password)
#cs = CogStack(hosts).use_api_key_auth(api_key=api_key)
#cs = CogStack(hosts).use_basic_auth(username=username, password=password)
#cs = CogStack(hosts).use_api_key_auth("")


# Check the list of Indices and columns

View all indices and their aliases available to this user. Either index names or their aliases can be used to extract data

In [None]:
print_dataframe(cs.get_indices_and_aliases(), ', ')

View fields/columns and their data types for provided index names or aliases

In [None]:
indices = ["example_index"] # <- CHANGE THIS
cs.get_index_fields(indices)

# Set search query parameters

In [None]:
pt_list = [ ]  # example list of patients' patient_TrustNumber here

## Columns of interest

Select your fields and list in order of output columns

In [None]:
columns = []

## Build query

For further information on [how to build a query can be found here](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html)

Further information on [free text string queries can be found here](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html)


In [None]:
search_query =  {
                "bool": {
                #"filter": {
                #    "terms": {
                #    "patient_TrustNumber": pt_list
                #    }
                #},
                "must": [
                    {
                    "query_string": {
                        "query": "",
                         "default_field":""
                    }
                    }
                ]
                }
}
    

In [None]:
# Count the number of documents matching the search query
example_indices = ["example-index"]  # <- CHANGE THIS
cs.count_search_results(index=example_indices, query=search_query)


# Search, Process, and Save
Use either of the functions to extract search results

In [None]:
# Read data using scan helper function.
# Does not provide a scroll id, so cannot be resumed if search fails midway.
example_indices = ["example-index"]  # <- CHANGE THIS
df = cs.read_data_with_scan(index=example_indices, query=search_query, include_fields=columns)
print(df)

In [None]:
# Read data with scroll API and get scroll id if search fails midway. 
# Can be used to resume the search from the failed scroll id.
example_indices = ["example-index"]  # <- CHANGE THIS
df = cs.read_data_with_scroll(index=example_indices, query=search_query, include_fields=columns)
print(df)

In [None]:
# Read data with sorting and get search_after value if search fails midway.
# Can be used to resume the search from the failed search_after value but can be slower than scan or scroll methods for large datasets.
# Note: Sorting requires a field to sort by, which should be present in the index. Default sorting is by _id.
example_indices = ["example-index"]  # <- CHANGE THIS
df = cs.read_data_with_sorting(index=example_indices, query=search_query, 
                               include_fields=columns)
print(df)

## Process

In [None]:
# Whatever you want here
# For example, display the first few rows of the DataFrame
df.head()

## Save

In [None]:
# Save the DataFrame to a CSV file
path_to_results = "data/cogstack_search_results"
file_name = "file_name.csv"

In [None]:
df.to_csv(os.path.join(path_to_results, file_name), index=False)