In [1]:
import polars as pl
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import numpy as np
import ipywidgets as widgets
from IPython.display import display, HTML
import scipy.stats as stats
from pathlib import Path


In [13]:
import plotly.io as pio
pio.templates.default = "plotly_dark"

In [14]:
def read_and_process_csv(file_path):
    df = pl.read_csv(file_path)
    obj_id = file_path.stem.split('_')[2]
    metric_type = file_path.stem.split('_')[-1]
    
    if 'object_id' not in df.columns:
        df = df.with_columns(pl.lit(obj_id).alias('object_id'))
    if 'metric_type' not in df.columns:
        df = df.with_columns(pl.lit(metric_type).alias('metric_type'))
    
    return df


In [15]:
def common_type(column_name):
    types = [df[column_name].dtype for df in dfs if column_name in df.columns]
    if pl.Float64 in types:
        return pl.Float64
    elif pl.Int64 in types:
        return pl.Int64
    else:
        return pl.Utf8


In [16]:
data_dir = Path('../metrics/data/csv')
dfs = []
for file in data_dir.glob('*.csv'):
    if file.stem.endswith(('COMPOSITE', 'FEATURE', 'SSIM')):
        dfs.append(read_and_process_csv(file))

common_columns = list(set.intersection(*[set(df.columns) for df in dfs]))

for column in common_columns:
    common_dtype = common_type(column)
    dfs = [df.with_columns(df[column].cast(common_dtype)) for df in dfs]




In [17]:
combined_df = pl.concat([df.select(common_columns) for df in dfs])

In [18]:
print(f"Total number of rows: {combined_df.shape[0]}")
print(f"Number of unique objects: {combined_df['object_id'].n_unique()}")
print(f"Metric types: {combined_df['metric_type'].unique().to_list()}")
print(f"Common columns: {common_columns}")
print("\nColumn data types:")
for column in common_columns:
    print(f"{column}: {combined_df[column].dtype}")


Total number of rows: 11721
Number of unique objects: 10
Metric types: ['COMPOSITE', 'SSIM', 'FEATURE']
Common columns: ['std_dev', 'position_z', 'iteration', 'score', 'azimuthal', 'iterations', 'point_id', 'position_y', 'metric_type', 'mean', 'position_x', 'radius', 'acquisition_value', 'polar', 'step_size', 'level', 'object_id', 'timestamp']

Column data types:
std_dev: Float64
position_z: Float64
iteration: Int64
score: Float64
azimuthal: Float64
iterations: Int64
point_id: String
position_y: Float64
metric_type: String
mean: Float64
position_x: Float64
radius: Float64
acquisition_value: Float64
polar: Float64
step_size: Float64
level: Int64
object_id: String
timestamp: Int64


In [19]:
print(f"Total number of rows: {combined_df.shape[0]}")


Total number of rows: 11721


In [20]:
def create_3d_scatter(df, obj_id, metric_type):
    fig = go.Figure(data=[go.Scatter3d(
        x=df['position_x'],
        y=df['position_y'],
        z=df['position_z'],
        mode='markers',
        marker=dict(
            size=5,
            color=df['score'],
            colorscale='Viridis',
            opacity=0.8,
            colorbar=dict(title="Score")
        ),
        text=df['score'].round(3),
        hoverinfo='text'
    )])
    
    fig.update_layout(
        title=f'3D Scatter Plot for Object {obj_id} - {metric_type}',
        scene=dict(
            xaxis_title='X Position',
            yaxis_title='Y Position',
            zaxis_title='Z Position',
            aspectmode='cube'
        ),
        height=700,
    )
    
    return fig

In [21]:
def create_comparison_plot(df, obj_id):
    metric_types = ['COMPOSITE', 'FEATURE', 'SSIM']
    fig = make_subplots(rows=1, cols=3, subplot_titles=metric_types,
                        specs=[[{'type': 'scatter3d'}]*3])
    
    for i, metric_type in enumerate(metric_types, 1):
        metric_df = df.filter((pl.col('object_id') == obj_id) & (pl.col('metric_type') == metric_type))
        
        fig.add_trace(
            go.Scatter3d(
                x=metric_df['position_x'],
                y=metric_df['position_y'],
                z=metric_df['position_z'],
                mode='markers',
                marker=dict(
                    size=5,
                    color=metric_df['score'],
                    colorscale='Viridis',
                    opacity=0.8,
                    colorbar=dict(title="Score", x=0.9 if i == 3 else None)
                ),
                text=metric_df['score'].round(3),
                hoverinfo='text'
            ),
            row=1, col=i
        )
    
    fig.update_layout(
        title=f'Comparison of Metric Types for Object {obj_id}',
        height=700,
        scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z', aspectmode='cube'),
        scene2=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z', aspectmode='cube'),
        scene3=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z', aspectmode='cube')
    )
    
    return fig


In [22]:
def create_enhanced_box_plot(df):
    fig = go.Figure()
    
    for metric_type in ['COMPOSITE', 'FEATURE', 'SSIM']:
        metric_df = df.filter(pl.col('metric_type') == metric_type)
        fig.add_trace(go.Box(
            y=metric_df['score'],
            x=metric_df['object_id'],
            name=metric_type,
            boxmean=True,
            marker_color=px.colors.qualitative.Plotly[['COMPOSITE', 'FEATURE', 'SSIM'].index(metric_type)]
        ))
    
    fig.update_layout(
        title='Score Distribution Across Objects and Metric Types',
        xaxis_title='Object ID',
        yaxis_title='Score',
        boxmode='group',
        height=600,
        legend_title='Metric Type',
        updatemenus=[
            dict(
                buttons=list([
                    dict(label="Linear Scale",
                         method="relayout",
                         args=[{"yaxis.type": "linear"}]),
                    dict(label="Log Scale",
                         method="relayout",
                         args=[{"yaxis.type": "log"}]),
                ]),
                direction="down",
                pad={"r": 10, "t": 10},
                showactive=True,
                x=0.9,
                xanchor="left",
                y=1.1,
                yanchor="top"
            ),
        ]
    )
    
    return fig

In [23]:
def create_violin_plot(df):
    fig = go.Figure()
    
    for metric_type in ['COMPOSITE', 'FEATURE', 'SSIM']:
        metric_df = df.filter(pl.col('metric_type') == metric_type)
        fig.add_trace(go.Violin(
            y=metric_df['score'],
            x=metric_df['object_id'],
            name=metric_type,
            box_visible=True,
            meanline_visible=True,
            opacity=0.6,
            line_color=px.colors.qualitative.Plotly[['COMPOSITE', 'FEATURE', 'SSIM'].index(metric_type)]
        ))
    
    fig.update_layout(
        title='Score Distribution (Violin Plot) Across Objects and Metric Types',
        xaxis_title='Object ID',
        yaxis_title='Score',
        violinmode='group',
        height=600,
        legend_title='Metric Type'
    )
    
    return fig

In [24]:
def create_heatmap(df):
    pivot_df = df.pivot(values='score', index='object_id', columns='metric_type', aggregate_function='mean')
    
    fig = go.Figure(data=go.Heatmap(
        z=pivot_df.select(pl.exclude('object_id')).to_numpy(),
        x=pivot_df.columns[1:],
        y=pivot_df['object_id'],
        colorscale='Viridis',
        colorbar=dict(title="Average Score")
    ))
    
    fig.update_layout(
        title='Average Score Heatmap: Objects vs Metric Types',
        xaxis_title='Metric Type',
        yaxis_title='Object ID',
        height=600
    )
    
    return fig

In [25]:
def create_radar_chart(df):
    avg_scores = df.group_by(['object_id', 'metric_type']).agg(pl.col('score').mean()).sort('object_id')
    
    fig = go.Figure()
    
    for obj_id in avg_scores['object_id'].unique():
        obj_scores = avg_scores.filter(pl.col('object_id') == obj_id)
        fig.add_trace(go.Scatterpolar(
            r=obj_scores['score'],
            theta=obj_scores['metric_type'],
            fill='toself',
            name=f'Object {obj_id}'
        ))
    
    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=True,
                range=[0, 1]
            )),
        showlegend=True,
        title='Average Scores by Object and Metric Type (Radar Chart)',
        height=600
    )
    
    return fig

In [26]:
def perform_statistical_analysis(df):
    results = []
    
    for obj_id in df['object_id'].unique():
        obj_df = df.filter(pl.col('object_id') == obj_id)
        
        for metric1, metric2 in [('COMPOSITE', 'FEATURE'), ('COMPOSITE', 'SSIM'), ('FEATURE', 'SSIM')]:
            scores1 = obj_df.filter(pl.col('metric_type') == metric1)['score'].to_numpy()
            scores2 = obj_df.filter(pl.col('metric_type') == metric2)['score'].to_numpy()
            
            t_stat, p_value = stats.ttest_ind(scores1, scores2)
            
            results.append({
                'Object ID': obj_id,
                'Metric 1': metric1,
                'Metric 2': metric2,
                't-statistic': t_stat,
                'p-value': p_value
            })
    
    return pl.DataFrame(results)


In [27]:
def update_plots(obj_id):
    df_filtered = combined_df.filter(pl.col('object_id') == obj_id)
    
    fig_3d = create_3d_scatter(df_filtered.filter(pl.col('metric_type') == 'COMPOSITE'), obj_id, 'COMPOSITE')
    fig_comparison = create_comparison_plot(combined_df, obj_id)
    
    display(fig_3d)
    display(fig_comparison)


In [28]:
object_dropdown = widgets.Dropdown(
    options=combined_df['object_id'].unique().sort().to_list(),
    description='Select Object:',
    disabled=False,
)



In [None]:
display(object_dropdown)
widgets.interactive(update_plots, obj_id=object_dropdown)


Dropdown(description='Select Object:', options=('000001', '000002', '000003', '000004', '000005', '000006', '0…

interactive(children=(Dropdown(description='Select Object:', options=('000001', '000002', '000003', '000004', …

In [68]:
# Box Plot
fig_box = create_enhanced_box_plot(combined_df)
fig_box.show()



In [69]:
fig_violin = create_violin_plot(combined_df)
fig_violin.show()

In [75]:
fig_radar = create_radar_chart(combined_df)
fig_radar.show()