In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.colors as mcolors
import seaborn as sns

In [17]:
df = pd.read_csv ('/kaggle/input/hackathon/final_results.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,name,term,facts,decision_type,first_party,second_party,first_party_winner,issue_area,facts_len,...,llama_prediction,llama_reasoning,llama_consistency_predictions,llama_consistency_reasonings,llama_consistency_scores,deepseek_prediction,deepseek_reasoning,deepseek_consistency_predictions,deepseek_consistency_reasonings,deepseek_consistency_scores
0,464,School Committee of the Town of Burlington v. ...,1984,<p>Under the provisions of the Education of th...,majority opinion,"School Committee of Burlington, Massachusetts,...",Department of Education of the Commonwealth of...,False,Civil Rights,3039,...,1,The Education of the Handicapped Act requires...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","["" The Education of the Handicapped Act requir...",100.0,0,The reasoning for this decision is based on t...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The reasoning for this decision is based on...",100.0
1,607,Graham v. Connor,1988,"<p>On November 12, 1984, Dethorne Graham, a di...",majority opinion,Dethorne Graham,M.S. Connor,True,Civil Rights,2643,...,1,I rule in favor of Dethorne Graham because th...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",[' I rule in favor of Dethorne Graham because ...,100.0,1,The facts of the case demonstrate that Dethor...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",[' The facts of the case demonstrate that Deth...,100.0
2,1593,Sosa v. Alvarez-Machain,2003,<p>A U.S. Drug Enforcement Agency (DEA) specia...,majority opinion,Jose Francisco Sosa,"Humberto Alvarez-Machain, et al.",True,Economic Activity,2549,...,0,The case presented involves Jose Francisco So...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The case centers around Humberto Alvarez-Ma...",100.0,0,The Alien Tort Statute (ATS) provides federal...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The Alien Tort Statute (ATS) provides feder...",100.0
3,1810,Jones v. Bock,2006,<p>Congress passed the Prisoner Litigation Ref...,majority opinion,Lorenzo L. Jones,"Barbara Bock, Warden, et al.",True,Criminal Procedure,2758,...,0,The Prisoner Litigation Reform Act (PLRA) req...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The Prisoner Litigation Reform Act (PLRA) r...",100.0,0,The Prisoner Litigation Reform Act (PLRA) req...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The Prisoner Litigation Reform Act (PLRA) r...",100.0
4,1876,Boumediene v. Bush,2007,<p>In 2002 Lakhdar Boumediene and five other A...,majority opinion,Lakhdar Boumediene et al.,"George W. Bush, President of the United States...",True,Criminal Procedure,2608,...,1,The Suspension Clause of the Constitution sta...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","["" The Suspension Clause of the Constitution s...",100.0,1,The Supreme Court ruled in favor of Lakhdar B...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",[' The Supreme Court ruled in favor of Lakhdar...,100.0


In [4]:
list(df)

['Unnamed: 0',
 'name',
 'term',
 'facts',
 'decision_type',
 'first_party',
 'second_party',
 'first_party_winner',
 'issue_area',
 'facts_len',
 'facts_cleaned',
 'first_party_winner_binary',
 'llama_prediction',
 'llama_reasoning',
 'llama_consistency_predictions',
 'llama_consistency_reasonings',
 'llama_consistency_scores',
 'deepseek_prediction',
 'deepseek_reasoning',
 'deepseek_consistency_predictions',
 'deepseek_consistency_reasonings',
 'deepseek_consistency_scores']

In [11]:
# Semantic Similarity Scores

# Load the model for generating embeddings
model = SentenceTransformer('all-mpnet-base-v2')  # This is a good general-purpose model

In [22]:
# %%timeit
# Embeddings

def compute_embeddings(df):
    """
    Compute embeddings for facts and reasoning columns
    """
    # Generate embeddings
    facts_embeddings = model.encode(df['facts_cleaned'].tolist(), show_progress_bar=True)
    llama_embeddings = model.encode(df['llama_reasoning'].tolist(), show_progress_bar=True)
    
    # Add embeddings to dataframe
    df['facts_embeddings'] = list(facts_embeddings)
    df['llama_embeddings'] = list(llama_embeddings)
    
    return df, facts_embeddings, llama_embeddings

df2, facts_embeddings, llama_embeddings = compute_embeddings(df)
df2.head()

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Unnamed: 0.1,Unnamed: 0,name,term,facts,decision_type,first_party,second_party,first_party_winner,issue_area,facts_len,...,llama_consistency_predictions,llama_consistency_reasonings,llama_consistency_scores,deepseek_prediction,deepseek_reasoning,deepseek_consistency_predictions,deepseek_consistency_reasonings,deepseek_consistency_scores,facts_embeddings,llama_embeddings
0,464,School Committee of the Town of Burlington v. ...,1984,<p>Under the provisions of the Education of th...,majority opinion,"School Committee of Burlington, Massachusetts,...",Department of Education of the Commonwealth of...,False,Civil Rights,3039,...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","["" The Education of the Handicapped Act requir...",100.0,0,The reasoning for this decision is based on t...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The reasoning for this decision is based on...",100.0,"[-0.0021623226, 0.015396201, 0.007179515, 0.01...","[-0.0015088008, 0.014899541, -0.0010135036, 0...."
1,607,Graham v. Connor,1988,"<p>On November 12, 1984, Dethorne Graham, a di...",majority opinion,Dethorne Graham,M.S. Connor,True,Civil Rights,2643,...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",[' I rule in favor of Dethorne Graham because ...,100.0,1,The facts of the case demonstrate that Dethor...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",[' The facts of the case demonstrate that Deth...,100.0,"[0.028898142, -0.00205573, 0.019906627, -0.020...","[0.021059742, -0.00011932641, 0.019157227, -0...."
2,1593,Sosa v. Alvarez-Machain,2003,<p>A U.S. Drug Enforcement Agency (DEA) specia...,majority opinion,Jose Francisco Sosa,"Humberto Alvarez-Machain, et al.",True,Economic Activity,2549,...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The case centers around Humberto Alvarez-Ma...",100.0,0,The Alien Tort Statute (ATS) provides federal...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The Alien Tort Statute (ATS) provides feder...",100.0,"[-0.033591077, 0.042352844, 0.017997526, 0.027...","[0.0037948343, 0.017836072, 0.030419426, 0.065..."
3,1810,Jones v. Bock,2006,<p>Congress passed the Prisoner Litigation Ref...,majority opinion,Lorenzo L. Jones,"Barbara Bock, Warden, et al.",True,Criminal Procedure,2758,...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The Prisoner Litigation Reform Act (PLRA) r...",100.0,0,The Prisoner Litigation Reform Act (PLRA) req...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The Prisoner Litigation Reform Act (PLRA) r...",100.0,"[-0.013156697, 0.053748377, 0.028643325, -0.01...","[0.008206627, 0.07880169, 0.02407287, -0.00641..."
4,1876,Boumediene v. Bush,2007,<p>In 2002 Lakhdar Boumediene and five other A...,majority opinion,Lakhdar Boumediene et al.,"George W. Bush, President of the United States...",True,Criminal Procedure,2608,...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","["" The Suspension Clause of the Constitution s...",100.0,1,The Supreme Court ruled in favor of Lakhdar B...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",[' The Supreme Court ruled in favor of Lakhdar...,100.0,"[0.035669968, -0.021940198, 0.04156141, -0.031...","[0.031976752, 0.0015162125, 0.06024114, 0.0031..."


In [23]:
# Similarity Score 

def compute_similarities(df, facts_embeddings, llama_embeddings):
    """
    Compute cosine similarity between facts and reasoning
    """
    # Compute pairwise cosine similarity
    similarities = []
    for i in range(len(facts_embeddings)):
        sim = cosine_similarity([facts_embeddings[i]], [llama_embeddings[i]])[0][0]
        similarities.append(sim)
    
    df['similarity_score'] = similarities
    
    return df

df3 = compute_similarities(df2, facts_embeddings, llama_embeddings)

In [25]:
def visualize_similarity_distribution(df):
    """
    Create a histogram of similarity scores
    """
    fig = px.histogram(
        df, 
        x='similarity_score',
        nbins=30,
        color_discrete_sequence=['#3366CC'],
        title='Distribution of Semantic Similarity Scores between Ground Truth and LLama Reasoning',
        labels={'similarity_score': 'Cosine Similarity Score'},
        opacity=0.8
    )
    
    fig.update_layout(
        template='plotly_white',
        font=dict(family='Arial', size=14),
        title_font=dict(size=18),
        xaxis=dict(tickfont=dict(size=12), title_font=dict(size=14)),
        yaxis=dict(tickfont=dict(size=12), title_font=dict(size=14)),
        bargap=0.1,
        margin=dict(l=50, r=50, t=80, b=50),
    )
    
    fig.add_vline(
        x=df['similarity_score'].mean(), 
        line_dash="dash", 
        line_color="red",
        annotation_text=f"Mean: {df['similarity_score'].mean():.3f}",
        annotation_position="top right"
    )
    
    fig.update_traces(marker_line_color='rgba(0,0,0,0.3)', marker_line_width=1)
    
    return fig
    
visualize_similarity_distribution(df3)

In [26]:
def visualize_embeddings_2d(facts_embeddings, llama_embeddings):
    """
    Reduce embeddings to 2D using PCA or t-SNE and visualize
    """
    # Combine embeddings for dimensionality reduction
    combined_embeddings = np.vstack((facts_embeddings, llama_embeddings))
    
    # PCA for dimensionality reduction
    pca = PCA(n_components=2)
    embeddings_2d = pca.fit_transform(combined_embeddings)
    
    # Split back to facts and llama embeddings
    facts_2d = embeddings_2d[:len(facts_embeddings)]
    llama_2d = embeddings_2d[len(facts_embeddings):]
    
    # Create DataFrame for plotting
    plot_df = pd.DataFrame({
        'x': np.concatenate([facts_2d[:, 0], llama_2d[:, 0]]),
        'y': np.concatenate([facts_2d[:, 1], llama_2d[:, 1]]),
        'type': ['Ground Truth'] * len(facts_2d) + ['LLama Reasoning'] * len(llama_2d),
        'index': list(range(len(facts_2d))) + list(range(len(llama_2d)))
    })
    
    # Plot
    fig = px.scatter(
        plot_df, 
        x='x', 
        y='y', 
        color='type',
        color_discrete_map={'Ground Truth': '#1F77B4', 'LLama Reasoning': '#FF7F0E'},
        title='2D Projection of Embedding Vectors',
        labels={'x': 'PCA Dimension 1', 'y': 'PCA Dimension 2'},
        hover_data=['index'],
        opacity=0.7
    )
    
    # Draw lines between corresponding points
    for i in range(len(facts_2d)):
        fig.add_trace(
            go.Scatter(
                x=[facts_2d[i, 0], llama_2d[i, 0]],
                y=[facts_2d[i, 1], llama_2d[i, 1]],
                mode='lines',
                line=dict(color='rgba(100,100,100,0.2)', width=0.5),
                showlegend=False,
                hoverinfo='none'
            )
        )
    
    fig.update_layout(
        template='plotly_white',
        font=dict(family='Arial', size=14),
        title_font=dict(size=18),
        legend_title_text='',
        xaxis=dict(tickfont=dict(size=12), title_font=dict(size=14)),
        yaxis=dict(tickfont=dict(size=12), title_font=dict(size=14)),
        margin=dict(l=50, r=50, t=80, b=50),
    )
    
    return fig

visualize_embeddings_2d(facts_embeddings, llama_embeddings)

In [32]:
def visualize_case_similarities(df, top_n=20):
    """
    Create a bar chart of similarity scores for individual cases
    """
    # Sort by similarity score
    sorted_df = df.sort_values('similarity_score', ascending=False)
    
    # Take top_n and bottom_n cases
    top_cases = sorted_df.head(top_n).copy()
    bottom_cases = sorted_df.tail(top_n).copy()
    
    # Create case identifiers (use case names if available, otherwise indices)
    if 'name' in df.columns:
        top_cases.loc[:, 'case_id'] = top_cases['name']
        bottom_cases.loc[:, 'case_id'] = bottom_cases['name']
    else:
        top_cases.loc[:, 'case_id'] = 'Case ' + top_cases.index.astype(str)
        bottom_cases.loc[:, 'case_id'] = 'Case ' + bottom_cases.index.astype(str)
    
    # Highest similarity cases
    fig_high = px.bar(
        top_cases, 
        x='case_id', 
        y='similarity_score',
        color='similarity_score',
        color_continuous_scale='Blues',
        title=f'Top {top_n} Cases with Highest Semantic Similarity',
        labels={'similarity_score': 'Similarity Score', 'case_id': 'Case'}
    )
    
    fig_high.update_layout(
        template='plotly_white',
        font=dict(family='Arial', size=14),
        title_font=dict(size=18),
        xaxis=dict(tickangle=45, tickfont=dict(size=10), title_font=dict(size=14)),
        yaxis=dict(tickfont=dict(size=12), title_font=dict(size=14), range=[0, 1]),
        coloraxis_showscale=False,
        margin=dict(l=50, r=50, t=80, b=150),
    )
    
    # Lowest similarity cases
    fig_low = px.bar(
        bottom_cases, 
        x='case_id', 
        y='similarity_score',
        color='similarity_score',
        color_continuous_scale='Blues',
        title=f'Top {top_n} Cases with Lowest Semantic Similarity',
        labels={'similarity_score': 'Similarity Score', 'case_id': 'Case'}
    )
    
    fig_low.update_layout(
        template='plotly_white',
        font=dict(family='Arial', size=14),
        title_font=dict(size=18),
        xaxis=dict(tickangle=45, tickfont=dict(size=10), title_font=dict(size=14)),
        yaxis=dict(tickfont=dict(size=12), title_font=dict(size=14), range=[0, 1]),
        coloraxis_showscale=False,
        margin=dict(l=50, r=50, t=80, b=150),
    )
    
    return fig_high, fig_low

fig_high, fig_low = visualize_case_similarities(df3, top_n=20)
fig_high

In [33]:
fig_low

In [34]:
def create_heatmap_by_category(df, category_col='issue_area'):
    """
    Create a heatmap of similarity scores by category if available
    """
    if category_col not in df.columns:
        return None
    
    # Group by category and calculate mean, median, std
    category_stats = df.groupby(category_col)['similarity_score'].agg(
        ['mean', 'median', 'std', 'count']
    ).sort_values('mean', ascending=False)
    
    # Create a heatmap
    fig = go.Figure()
    
    fig.add_trace(go.Heatmap(
        z=category_stats['mean'].values.reshape(-1, 1),
        y=category_stats.index,
        x=['Mean Similarity'],
        colorscale='Blues',
        text=[[f"{val:.3f}"] for val in category_stats['mean']],
        texttemplate="%{text}",
        showscale=True
    ))
    
    fig.update_layout(
        title='Mean Semantic Similarity by Case Category',
        template='plotly_white',
        font=dict(family='Arial', size=14),
        title_font=dict(size=18),
        yaxis=dict(tickfont=dict(size=12), title_font=dict(size=14)),
        xaxis=dict(tickfont=dict(size=12), title_font=dict(size=14)),
        margin=dict(l=150, r=50, t=80, b=50),
        height=max(300, len(category_stats) * 30),
    )
    
    return fig
create_heatmap_by_category(df3, category_col='issue_area')

In [None]:
# def analyze_and_visualize(df, output_prefix="scotus_semantic_analysis"):
#     """
#     Main function to run the analysis and generate visualizations
#     """
    
#     # Compute embeddings and similarities
#     df, facts_embeddings, llama_embeddings = compute_embeddings(df)
#     df = compute_similarities(df, facts_embeddings, llama_embeddings)
    
#     # Generate visualizations
#     fig_dist = visualize_similarity_distribution(df)
#     fig_dist.write_image(f"{output_prefix}_similarity_distribution.png", scale=2)
    
#     # 2D embedding visualization
#     fig_2d = visualize_embeddings_2d(facts_embeddings, llama_embeddings)
#     fig_2d.write_image(f"{output_prefix}_2d_embeddings.png", scale=2)
    
#     # Case-specific visualizations
#     fig_high, fig_low = visualize_case_similarities(df)
#     fig_high.write_image(f"{output_prefix}_highest_similarity_cases.png", scale=2)
#     fig_low.write_image(f"{output_prefix}_lowest_similarity_cases.png", scale=2)
    
#     # Category heatmap if applicable
#     fig_cat = create_heatmap_by_category(df)
#     if fig_cat is not None:
#         fig_cat.write_image(f"{output_prefix}_category_heatmap.png", scale=2)
    
#     # Create a summary dashboard
#     create_dashboard_figure(df, facts_embeddings, llama_embeddings).write_html(
#         f"{output_prefix}_dashboard.html"
#     )
    
#     # Save processed data with similarities
#     df.to_csv(f"{output_prefix}_with_similarities.csv", index=False)
    
#     print(f"Analysis complete. Files saved with prefix: {output_prefix}")
#     return df
    
# analyze_and_visualize(df3, output_prefix="scotus_semantic_analysis")

In [37]:
def create_dashboard_figure(df, facts_embeddings, llama_embeddings):
    """
    Create a combined dashboard with multiple visualizations
    """
    # Create subplots
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            'Distribution of Similarity Scores', 
            '2D Projection of Embeddings',
            'Top 10 Highest Similarity Cases',
            'Top 10 Lowest Similarity Cases'
        ),
        specs=[
            [{"type": "xy"}, {"type": "xy"}],
            [{"type": "xy"}, {"type": "xy"}]
        ],
        vertical_spacing=0.15,
        horizontal_spacing=0.1
    )
    
    # Add histogram
    hist_values = np.histogram(
        df['similarity_score'], 
        bins=30, 
        range=(min(df['similarity_score']), max(df['similarity_score']))
    )
    
    fig.add_trace(
        go.Bar(
            x=[(hist_values[1][i] + hist_values[1][i+1])/2 for i in range(len(hist_values[1])-1)],
            y=hist_values[0],
            marker_color='#3366CC',
            opacity=0.8,
            name="Similarity Scores"
        ),
        row=1, col=1
    )
    
    fig.add_vline(
        x=df['similarity_score'].mean(), 
        line_dash="dash", 
        line_color="red",
        row=1, col=1
    )
    
    # Add 2D embeddings
    # PCA for dimensionality reduction
    combined_embeddings = np.vstack((facts_embeddings, llama_embeddings))
    pca = PCA(n_components=2)
    embeddings_2d = pca.fit_transform(combined_embeddings)
    
    facts_2d = embeddings_2d[:len(facts_embeddings)]
    llama_2d = embeddings_2d[len(facts_embeddings):]
    
    # Add ground truth points
    fig.add_trace(
        go.Scatter(
            x=facts_2d[:, 0], 
            y=facts_2d[:, 1],
            mode='markers',
            marker=dict(color='#1F77B4', size=8, opacity=0.7),
            name="Ground Truth"
        ),
        row=1, col=2
    )
    
    # Add llama points
    fig.add_trace(
        go.Scatter(
            x=llama_2d[:, 0], 
            y=llama_2d[:, 1],
            mode='markers',
            marker=dict(color='#FF7F0E', size=8, opacity=0.7),
            name="LLama Reasoning"
        ),
        row=1, col=2
    )
    
    # Top and bottom cases
    sorted_df = df.sort_values('similarity_score', ascending=False)
    top_cases = sorted_df.head(10).copy()
    bottom_cases = sorted_df.tail(10).copy()
    
    if 'name' in df.columns:
        top_cases.loc[:, 'case_id'] = top_cases['name']
        bottom_cases.loc[:, 'case_id'] = bottom_cases['name']
    else:
        top_cases['case_id'] = 'Case ' + top_cases.index.astype(str)
        bottom_cases['case_id'] = 'Case ' + bottom_cases.index.astype(str)
    
    # Add top cases bar chart
    fig.add_trace(
        go.Bar(
            x=top_cases['case_id'],
            y=top_cases['similarity_score'],
            marker_color=top_cases['similarity_score'],
            marker_colorscale='Blues',
            name="Top Cases"
        ),
        row=2, col=1
    )
    
    # Add bottom cases bar chart
    fig.add_trace(
        go.Bar(
            x=bottom_cases['case_id'],
            y=bottom_cases['similarity_score'],
            marker_color=bottom_cases['similarity_score'],
            marker_colorscale='Blues',
            name="Bottom Cases"
        ),
        row=2, col=2
    )
    
    # Update layout
    fig.update_layout(
        title_text="Semantic Similarity Analysis of Supreme Court Cases",
        title_font=dict(size=20),
        font=dict(family='Arial', size=12),
        template='plotly_white',
        showlegend=True,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.05,
            xanchor="center",
            x=0.5
        ),
        height=900,
        width=1200
    )
    
    # Update x-axes
    fig.update_xaxes(
        title_text="Similarity Score", 
        row=1, col=1,
        titlefont=dict(size=12)
    )
    fig.update_xaxes(
        title_text="PCA Dimension 1", 
        row=1, col=2,
        titlefont=dict(size=12)
    )
    fig.update_xaxes(
        title_text="Case", 
        row=2, col=1, 
        tickangle=45,
        titlefont=dict(size=12)
    )
    fig.update_xaxes(
        title_text="Case", 
        row=2, col=2, 
        tickangle=45,
        titlefont=dict(size=12)
    )
    
    # Update y-axes
    fig.update_yaxes(
        title_text="Count", 
        row=1, col=1,
        titlefont=dict(size=12)
    )
    fig.update_yaxes(
        title_text="PCA Dimension 2", 
        row=1, col=2,
        titlefont=dict(size=12)
    )
    fig.update_yaxes(
        title_text="Similarity Score", 
        row=2, col=1,
        range=[0, 1],
        titlefont=dict(size=12)
    )
    fig.update_yaxes(
        title_text="Similarity Score", 
        row=2, col=2,
        range=[0, 1],
        titlefont=dict(size=12)
    )
    
    return fig

create_dashboard_figure(df3, facts_embeddings, llama_embeddings)

In [None]:
# Example usage
# if __name__ == "__main__":

#     df = analyze_and_visualize(df)