In [None]:
# import tracemalloc
# tracemalloc.start()
import pickle
from IPython.display import display
from rs_classes import async_request_client as async_client
import rs_functions.fetch_annotations_list as fetch_annotations
import rs_functions.fetch_annotation_content as fetch_annotation_content
import rs_functions.data_transformations as data_transformations
import rs_functions.fetch_annotation_meta as fetch_annotation_meta

# #Initialize client
client = async_client.AsyncRequestClient("", "")

# Initialize the set_widgets list
url_input, bool_toggle, dropdown = data_transformations.create_input_widgets()
display(dropdown, url_input, bool_toggle)

In [None]:
token_input = ""
query = {
    "query": {
        "$and": [
            # {
            #     "queue": {
            #         "$in": [
            #             "https://elis.rossum.ai/api/v1/queues/XXXXXX",
            #             "https://elis.rossum.ai/api/v1/queues/XXXXXX",
            #             "https://elis.rossum.ai/api/v1/queues/XXXXXX"
            #         ]
            #     }
            # },
            # {
            #     "field.sender_name.string": {
            #         "$eq": "Contoso Ltd"
            #     }
            # },
            {
                "status": {
                    "$in": [
                        "confirmed",
                        "exported",
                        "to_review"
                    ]
                }
            },
            {
                "created_at": {"$gt": "2024-08-19T00:00:00"}
            }
        ]
    }
}
######################################################
if dropdown.label == "prod-eu2":
    url = f"https://{url_input.value}{dropdown.value}"
    client.reset_inputs(token_input, f"{url}/api")
else:
    url = f"{dropdown.value}"
    client.reset_inputs(token_input, f"{url}/api")
######################################################

In [None]:
######### VALUE ANALYSIS #########
field_ids = ["document_id", "sender_name", "meta.created_at"]

# to save data for later set True
save_data = False
saved_data_name = ""
load_from_archive = ""

######################################################
# Collect annotations based on search query
if load_from_archive:
    print("loading from archive")
    with open(f'data_archive/{load_from_archive}', 'rb') as file:
        annotations_collection = pickle.load(file)
else:        
    annotations_collection = await fetch_annotations.search_with_query(
            client, query, allPages=bool_toggle.value, page_max=None)

    await fetch_annotation_content.get_annotation_content(
        client, annotations_collection
    )

df = data_transformations.text_value_analysis(
    field_ids, annotations_collection, base_url=f"{url}/document"
)
def make_clickable(url):
    return f'<a href="{url}" target="_blank">link</a>'
styled_output = df.style
styled_output = styled_output.format({"Address": make_clickable})
styled_output = styled_output.applymap(
    lambda val: "color: red" if "  || => Manual" in str(val) else ""
)
if save_data:
    with open(f'data_archive/{saved_data_name}', 'wb') as file:
        pickle.dump(annotations_collection, file)
display(styled_output)

In [None]:
######### POSITION ANALYSIS #########

field_id_for_posision = "document_id"
slicer_field_id = "sender_name"
n_neigbors = 2
contamination = 0.1

######### POSITION ANALYSIS #########
from rs_functions.fetch_pages_data import get_annotations_page
import plotly.express as px
from sklearn.neighbors import LocalOutlierFactor

# Collect annotations based on search query
annotations_collection = await fetch_annotations.search_with_query(
        client, query, allPages=bool_toggle.value, page_max=None)

await fetch_annotation_content.get_annotation_content(
    client, annotations_collection
)
await get_annotations_page(client, annotations_collection)

df = data_transformations.position_analysis(
    annotations_collection, field_id_for_posision, slicer_field_id
)

df = df.dropna()

# Create scatter plot using Plotly Express
fig = px.scatter(
    df,
    x="center_x_percent",
    y="center_y_percent",
    facet_col="page",
    hover_data={
        "annotation_id": True,
        "center_x_percent": False,
        "center_y_percent": False,
    },
)

fig.update_layout(
    title="All Slicers",
    width=800,
    height=800,
)

fig.update_layout(
    xaxis=dict(range=[0, 100]),
    yaxis=dict(range=[100, 0]),  # Reversing y-axis while maintaining range
)

fig.show()

# Get unique slicer values
unique_slicers = df["slicer"].unique()

# Plotting
for slicer_value in unique_slicers:
    # Filter the DataFrame based on the current slicer value
    sliced_data = df[df["slicer"] == slicer_value]

    outlier_labels = None

    if len(sliced_data) > n_neigbors:
        lof = LocalOutlierFactor(n_neighbors=n_neigbors)  # Adjust parameters as needed
        outlier_labels = lof.fit_predict(
            sliced_data[["center_x_percent", "center_y_percent"]]
        )

        # Create scatter plot using Plotly Express
        fig = px.scatter(
            sliced_data,
            x="center_x_percent",
            y="center_y_percent",
            color=outlier_labels,
            facet_col="page",
            hover_data={
                "annotation_id": True,
                "center_x_percent": False,
                "center_y_percent": False,
            },
        )

        fig.update_layout(
            width=450, height=450, title="Empty" if slicer_value == "" else slicer_value
        )
        fig.update_layout(xaxis=dict(range=[0, 100]), yaxis=dict(range=[100, 0]))

        fig.show()

### To show original data set
# import pandas as pd
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)

# display(df)
