In [35]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os

In [36]:
# folder = "results"
# files = os.listdir(folder)

# hybrid_and_files = [file for file in files if "search-hybrid_" in file]
# for file in hybrid_and_files:
#     new_file = file.replace("search-hybrid", "search-hybridAND")
#     os.rename(os.path.join(folder, file), os.path.join(folder, new_file))


In [38]:
def prepare_df(folder):
    files = os.listdir(folder)
    df = pd.DataFrame()
    for file in files:
        chunk = file.split("chunk-")[1].split("_")[0]
        # print(chunk, int(chunk))
        chunk = int(chunk)
        search = file.split("search-")[1].split("_")[0]
        lim = file.split("lim-")[1].split("_")[0]
        expand = file.split("expand-")[1].split(".")[0]

        temp_df = pd.read_csv(os.path.join(folder, file))
        temp_df["chunk"] = chunk
        temp_df["search"] = search
        temp_df["lim"] = int(lim)
        temp_df["expand"] = int(expand)
        df = pd.concat([df, temp_df])
    return df.reset_index(drop=True)
    
def prepare_summary(df):
    df_summary = df.groupby(["chunk","search", "lim", "expand"]).sum()
    df_summary = df_summary.reset_index()

    df_summary["recall"] = (df_summary["n_retrieved_rules"] / df_summary["n_target_rules"]).round(2)
    df_summary["recall/context"] = (df_summary["recall"] / df_summary["len_context"]).round(8)

    columns = ["chunk", "search", "expand", "lim", "len_context", "recall", "recall/context"]
    return df_summary[columns]


In [14]:
# def plot_results(df_summary):
#     # Create two subplots vertically stacked
#     fig = make_subplots(rows=2, cols=1, 
#                         subplot_titles=(
#                             "Recall vs Limit",
#                             "Recall vs Context Length"
#                         ))

#     # Define colors for expand values and markers for search types
#     colors = ['#636EFA', '#EF553B', '#00CC96']  # Default plotly colors
#     search_markers = {
#         'hybridAND': 'circle',
#         'hybridOR': 'circle-open',
#         'semantic': 'diamond',
#         'ftsAND': 'square',
#         'ftsOR': 'square-open'
#     }

#     # First plot: Recall vs Limit
#     for i, expand_val in enumerate(df_summary['expand'].unique()):
#         for search_type in df_summary['search'].unique():
#             df_filtered = df_summary[
#                 (df_summary['expand'] == expand_val) & 
#                 (df_summary['search'] == search_type)
#             ]
            
#             fig.add_trace(
#                 go.Scatter(
#                     x=df_filtered['lim'],
#                     y=df_filtered['recall'],
#                     mode='lines+markers',
#                     name=f'Expand {expand_val}, {search_type}',
#                     line=dict(color=colors[i]),
#                     marker=dict(
#                         symbol=search_markers[search_type], 
#                         size=df_filtered['lim']+5,
#                     ),
#                     hovertemplate="<br>".join([
#                         "Limit: %{x}",
#                         "Recall: %{y:.3f}",
#                         "Search: " + search_type,
#                         "Expand: %{customdata[0]}",
#                         "Context Length: %{customdata[1]:,}",
#                     ]),
#                     customdata=df_filtered[['expand', 'len_context']].values
#                 ),
#                 row=1, col=1
#             )

#     # Second plot: Recall vs Context Length
#     for i, expand_val in enumerate(df_summary['expand'].unique()):
#         for search_type in df_summary['search'].unique():
#             df_filtered = df_summary[
#                 (df_summary['expand'] == expand_val) & 
#                 (df_summary['search'] == search_type)
#             ]
            
#             fig.add_trace(
#                 go.Scatter(
#                     x=df_filtered['len_context'],
#                     y=df_filtered['recall'],
#                     mode='markers+lines',
#                     name=f'Expand {expand_val}, {search_type}',
#                     marker=dict(
#                         color=colors[i],
#                         symbol=search_markers[search_type],
#                         size=df_filtered['lim']+5,
#                     ),
#                     hovertemplate="<br>".join([
#                         "Context Length: %{x:,}",
#                         "Recall: %{y:.3f}",
#                         "Search: " + search_type,
#                         "Expand: %{customdata[0]}",
#                         "Limit: %{customdata[1]}",
#                     ]),
#                     customdata=df_filtered[['expand', 'lim']].values,
#                     showlegend=False
#                 ),
#                 row=2, col=1
#             )

#     # Update layout for dark theme and other customizations
#     fig.update_layout(
#         template="plotly_dark",
#         height=800,
#         legend_title="Configuration",
#         xaxis_title="Limit",
#         xaxis2_title="Context Length",
#         yaxis_title="Recall",
#         yaxis2_title="Recall"
#     )

#     # Set y-axes to start at 0
#     # fig.update_yaxes(range=[0, None], row=1, col=1)
#     # fig.update_yaxes(range=[0, None], row=2, col=1)

#     fig.show()


In [44]:
def plot_results(df_summary):
    # Create three subplots vertically stacked
    fig = make_subplots(rows=4, cols=1, 
                        subplot_titles=(
                            "Chunk Size 500: Recall vs Context Length",
                            "Chunk Size 1000: Recall vs Context Length",
                            "Chunk Size 1500: Recall vs Context Length",
                            "Chunk Size 2000: Recall vs Context Length"
                        ),
                        vertical_spacing=0.1)

    # Define colors for expand values and markers for search types
    colors = ['#636EFA', '#EF553B', '#00CC96']  # Default plotly colors
    search_markers = {
        'hybridAND': 'circle',
        'hybridOR': 'circle-open',
        'semantic': 'diamond',
        'ftsAND': 'square',
        'ftsOR': 'square-open'
    }

    # Plot for each chunk size
    for row, chunk_size in enumerate([500, 1000, 1500, 2000], 1):
        df_chunk = df_summary[df_summary['chunk'] == chunk_size]
        
        for i, expand_val in enumerate(df_chunk['expand'].unique()):
            for search_type in df_chunk['search'].unique():
                df_filtered = df_chunk[
                    (df_chunk['expand'] == expand_val) & 
                    (df_chunk['search'] == search_type)
                ]
                
                fig.add_trace(
                    go.Scatter(
                        x=df_filtered['len_context'],
                        y=df_filtered['recall'],
                        mode='markers+lines',
                        name=f'Expand {expand_val}, {search_type}',
                        marker=dict(
                            color=colors[i],
                            symbol=search_markers[search_type],
                            size=df_filtered['lim']+5,
                        ),
                        hovertemplate="<br>".join([
                            "Context Length: %{x:,}",
                            "Recall: %{y:.3f}",
                            "Search: " + search_type,
                            "Expand: %{customdata[0]}",
                            "Limit: %{customdata[1]}",
                        ]),
                        customdata=df_filtered[['expand', 'lim']].values,
                        showlegend=True if row==1 else False
                    ),
                    row=row, col=1
                )

    # Update layout for dark theme and other customizations
    fig.update_layout(
        template="plotly_dark",
        height=1200,  # Increased height for three plots
        legend_title="Configuration",
        showlegend=True
    )

    # Update all x and y axes titles
    for i in range(1, 4):
        fig.update_xaxes(title_text="Context Length", row=i, col=1)
        fig.update_yaxes(title_text="Recall", row=i, col=1)

    fig.show()

In [42]:
folder = "results/retrieval"
df = prepare_df(folder) 

# df = df.loc[df["chunk"] == 1500]
# df = df.loc[df["search"] != "ftsAND"]
# df = df.loc[df["expand"] < 2]


df_summary = prepare_summary(df)
df_summary.sort_values("recall", ascending = False).head(10)

Unnamed: 0,chunk,search,expand,lim,len_context,recall,recall/context
219,1500,hybridOR,0,6,1776417,0.73,4.1e-07
217,1500,hybridOR,1,5,3186683,0.73,2.3e-07
218,1500,hybridOR,2,5,4721540,0.73,1.5e-07
221,1500,hybridOR,2,6,5505773,0.73,1.3e-07
220,1500,hybridOR,1,6,3751622,0.73,1.9e-07
65,500,hybridOR,2,6,1953357,0.72,3.7e-07
267,2000,hybridOR,1,4,3347447,0.72,2.2e-07
269,2000,hybridOR,1,5,4063945,0.72,1.8e-07
68,500,hybridOR,2,7,2233008,0.72,3.2e-07
200,1500,hybridAND,2,5,4346548,0.72,1.7e-07


In [45]:
plot_results(df_summary)

In [76]:
# # Create two subplots vertically stacked
# fig = make_subplots(rows=2, cols=1, 
#                     subplot_titles=(
#                         "Recall vs Limit",
#                         "Recall vs Context Length"
#                     ))

# # Define colors for expand values and markers for search types
# colors = ['#636EFA', '#EF553B', '#00CC96']  # Default plotly colors
# search_markers = {
#     'hybridAND': 'circle',
#     'hybridOR': 'circle-open',
#     'semantic': 'diamond',
#     'ftsAND': 'square',
#     'ftsOR': 'square-open'
# }

# # First plot: Recall vs Limit
# for i, expand_val in enumerate(df_summary['expand'].unique()):
#     for search_type in df_summary['search'].unique():
#         df_filtered = df_summary[
#             (df_summary['expand'] == expand_val) & 
#             (df_summary['search'] == search_type)
#         ]
        
#         fig.add_trace(
#             go.Scatter(
#                 x=df_filtered['lim'],
#                 y=df_filtered['recall'],
#                 mode='lines+markers',
#                 name=f'Expand {expand_val}, {search_type}',
#                 line=dict(color=colors[i]),
#                 marker=dict(
#                     symbol=search_markers[search_type], 
#                     size=df_filtered['lim']+5,
#                 ),
#                 hovertemplate="<br>".join([
#                     "Limit: %{x}",
#                     "Recall: %{y:.3f}",
#                     "Search: " + search_type,
#                     "Expand: %{customdata[0]}",
#                     "Context Length: %{customdata[1]:,}",
#                 ]),
#                 customdata=df_filtered[['expand', 'len_context']].values
#             ),
#             row=1, col=1
#         )

# # Second plot: Recall vs Context Length
# for i, expand_val in enumerate(df_summary['expand'].unique()):
#     for search_type in df_summary['search'].unique():
#         df_filtered = df_summary[
#             (df_summary['expand'] == expand_val) & 
#             (df_summary['search'] == search_type)
#         ]
        
#         fig.add_trace(
#             go.Scatter(
#                 x=df_filtered['len_context'],
#                 y=df_filtered['recall'],
#                 mode='markers+lines',
#                 name=f'Expand {expand_val}, {search_type}',
#                 marker=dict(
#                     color=colors[i],
#                     symbol=search_markers[search_type],
#                     size=df_filtered['lim']+5,
#                 ),
#                 hovertemplate="<br>".join([
#                     "Context Length: %{x:,}",
#                     "Recall: %{y:.3f}",
#                     "Search: " + search_type,
#                     "Expand: %{customdata[0]}",
#                     "Limit: %{customdata[1]}",
#                 ]),
#                 customdata=df_filtered[['expand', 'lim']].values,
#                 showlegend=False
#             ),
#             row=2, col=1
#         )

# # Update layout for dark theme and other customizations
# fig.update_layout(
#     template="plotly_dark",
#     height=800,
#     legend_title="Configuration",
#     xaxis_title="Limit",
#     xaxis2_title="Context Length",
#     yaxis_title="Recall",
#     yaxis2_title="Recall"
# )

# # Set y-axes to start at 0
# # fig.update_yaxes(range=[0, None], row=1, col=1)
# # fig.update_yaxes(range=[0, None], row=2, col=1)

# fig.show()

In [74]:
print(None)

None
