In [None]:
import pandas as pd
import numpy as np
import torch
from sklearn.decomposition import PCA
import umap.umap_ as umap


import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio

pio.templates.default = "plotly_white"



from tqdm import tqdm

from scipy.stats import ttest_ind
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import LabelEncoder


# Load the DataFrame
WORK_DIR = "/home/hpc/b207dd/b207dd11/test/spin-politics"

model_name = "llama3-8b"


In [None]:
STANDARD_ACTIVATIONS_CACHE_DIR = f"/home/atuin/b207dd/b207dd11/test/DEU/standard_sentences/{model_name}"


# df.to_pickle(f"{STANDARD_ACTIVATIONS_CACHE_DIR}/standard_text_with_embeddings.pkl")

df = pd.read_pickle(f"{STANDARD_ACTIVATIONS_CACHE_DIR}/standard_text_with_embeddings.pkl")

In [None]:

# Define a function to convert categorical values to RGB values
def category_to_rgb(libertarian, collectivist, progressive):
    color_map = {
        "Neutral": 128,
        "Libertär": 225,
        "Restriktiv": 30,
        "Kollektivistisch": 225,
        "Individualistisch": 30,
        "Progressiv": 225,
        "Konservativ": 30
    }
    r = color_map[libertarian]
    g = color_map[collectivist]
    b = color_map[progressive]
    return f'rgb({r},{g},{b})'

df['Ideology'] = df['Libertarian'] + ' - ' + df['Collectivist'] + ' - ' + df['Progressive']
df['Color'] = df.apply(lambda row: category_to_rgb(row['Libertarian'], row['Collectivist'], row['Progressive']), axis=1)
# Create a color discrete map
unique_ideologies = df['Ideology'].unique()
color_discrete_map = {ideology: color for ideology, color in zip(unique_ideologies, df['Color'].unique())}


# Unsupervised PCA + UMAP

In [None]:

def pca_umap_plot(df, 
                  pca_cumul_var_ratio_thresh=None,
                  pca_n_components=None, 
                  umap_n_components=3, 
                  umap_n_neighbors=15, 
                  umap_min_dist=0.1, 
                  layer_list=None,
                  image_output_folder=""):
    embeddings = np.stack(df['Embedding'].values)
    n_layers = embeddings.shape[1]
    if layer_list is None:
        layer_list = range(n_layers)
    #
    for layer in tqdm(layer_list, desc="Processing layers"):
        layer_embeddings = embeddings[:, layer, :]
        # Standardize embeddings
        mean = np.mean(layer_embeddings, axis=0)
        std = np.std(layer_embeddings, axis=0)
        standardized_embeddings = (layer_embeddings - mean) / std
        # PCA
        if pca_n_components:
            num_components = pca_n_components
            pca = PCA(n_components=num_components)
            pca_embeddings = pca.fit_transform(standardized_embeddings)
        elif pca_cumul_var_ratio_thresh:
            pca = PCA()
            pca.fit(standardized_embeddings)
            cumul_var_ratio = np.cumsum(pca.explained_variance_ratio_)
            num_components = np.where(cumul_var_ratio >= pca_cumul_var_ratio_thresh)[0][0] + 1
            if num_components < umap_n_components:
                num_components = umap_n_components
            pca_embeddings = pca.transform(standardized_embeddings)[:, :num_components]
        print(f"Layer {layer} PCA num_components {num_components}")
        # UMAP
        if umap_n_components < num_components:
            umap_reducer = umap.UMAP(n_components=umap_n_components, 
                                    n_neighbors=umap_n_neighbors, 
                                    min_dist=umap_min_dist, 
                                    metric='cosine', 
                                    random_state=42)
            umap_embeddings = umap_reducer.fit_transform(pca_embeddings)
        else:
            umap_embeddings = pca_embeddings
        # df for plotting
        plot_df = df.copy()
        plot_df['UMAP1'] = umap_embeddings[:, 0]
        plot_df['UMAP2'] = umap_embeddings[:, 1]
        plot_df['UMAP3'] = umap_embeddings[:, 2] if umap_n_components == 3 else np.zeros(umap_embeddings.shape[0])
        # Plotting
        fig = px.scatter_3d(
            plot_df, x='UMAP1', y='UMAP2', z='UMAP3', 
            color='Ideology', 
            color_discrete_map=color_discrete_map,
            #color_continuous_scale=px.colors.sequential.Rainbow_r,
            #color_discrete_sequence=px.colors.sequential.Rainbow_r,
            title=f'Layer {layer} Embeddings',
            hover_data={
                'Libertarian': True,
                'Collectivist': True,
                'Progressive': True,
                'Topic': True,
                #'Response': True
            }
        )
        file_path = f'{image_output_folder}/scatter_layer{layer}_pca{num_components}_umap{umap_n_components}.html'
        fig.write_html(file_path)
        plt.close()
        plt.clf()

# Call the function
pca_umap_plot(
    df, 
    #pca_cumul_var_ratio_thresh=.5,
    pca_n_components=3, 
    umap_n_components=3, 
    umap_n_neighbors=100, 
    umap_min_dist=0.8, 
    #layer_list=[i for i in range(3)],
    image_output_folder=f"{WORK_DIR}/standard_img"
)


# Supervised (Assigned) Axis Projection

In [None]:
df_standard = df

In [None]:
p_value_threshold = 5e-6

# Function to calculate mean embeddings and filter significant features
def calculate_mean_embeddings_with_filter(df, axis, p_value_threshold=p_value_threshold):
    type_a_label = axis["type_a"]
    type_b_label = axis["type_b"]
    #
    type_a_embeddings = np.stack(df[df[axis["column"]] == type_a_label]['Embedding'].values)
    type_b_embeddings = np.stack(df[df[axis["column"]] == type_b_label]['Embedding'].values)
    print(type_a_embeddings.shape, type_b_embeddings.shape)
    # Calculate mean and variance
    type_a_mean = np.mean(type_a_embeddings, axis=0)
    type_b_mean = np.mean(type_b_embeddings, axis=0)
    #
    type_a_var = np.var(type_a_embeddings, axis=0)
    type_b_var = np.var(type_b_embeddings, axis=0)
    # Perform t-test for each feature
    t_values, p_values = ttest_ind(type_a_embeddings, type_b_embeddings, axis=0, equal_var=False)
    # Create filter for significant features
    significant_features = p_values < p_value_threshold
    # Calculate direction vector using only significant features
    direction_vector = np.zeros_like(type_a_mean)
    direction_vector[significant_features] = type_a_mean[significant_features] - type_b_mean[significant_features]
    # Normalize the direction vector
    #norm = np.linalg.norm(direction_vector)
    #if norm != 0:
    #    direction_vector /= norm    
    return direction_vector, significant_features

# Define the axes
axes = {
    "Libertarian": {"column": "Libertarian", "type_a": "Libertär", "type_b": "Restriktiv"},
    "Collectivist": {"column": "Collectivist", "type_a": "Kollektivistisch", "type_b": "Individualistisch"},
    "Progressive": {"column": "Progressive", "type_a": "Progressiv", "type_b": "Konservativ"}
}



# Calculate mean embeddings and filters for each axis
mean_embeddings = {}
significant_features = {}
for axis_name, axis in axes.items():
    mean_embeddings[axis_name], significant_features[axis_name] = calculate_mean_embeddings_with_filter(df_standard, axis)
    num_significant_features = np.sum(significant_features[axis_name])
    total_features = significant_features[axis_name].size
    print(f"{axis_name}: {num_significant_features} of {total_features} features used")


In [None]:

# Create a new DataFrame for plotting
plot_data = []

for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing rows"):
    embedding = row['Embedding']    
    # Apply the significant feature filter to the embeddings
    filtered_embedding = {}
    for axis_name in axes.keys():
        filtered_embedding[axis_name] = embedding * significant_features[axis_name]
    # Calculate scores for each axis using only significant features
    libertarian_score = np.dot(filtered_embedding["Libertarian"].flatten(), mean_embeddings["Libertarian"].flatten())
    collectivist_score = np.dot(filtered_embedding["Collectivist"].flatten(), mean_embeddings["Collectivist"].flatten())
    progressive_score = np.dot(filtered_embedding["Progressive"].flatten(), mean_embeddings["Progressive"].flatten())
    #
    plot_data.append({
        'Libertarian': row['Libertarian'],
        'Collectivist': row['Collectivist'],
        'Progressive': row['Progressive'],
        'Topic': row['Topic'],
        "Libertarian": libertarian_score,
        "Collectivist": collectivist_score,
        "Progressive": progressive_score,
        "Ideology": row["Ideology"]
    })

plot_df = pd.DataFrame(plot_data)


# Calculate the min and max values for each axis across all layers
x_min, x_max = plot_df['Libertarian'].min(), plot_df['Libertarian'].max()
y_min, y_max = plot_df['Collectivist'].min(), plot_df['Collectivist'].max()
z_min, z_max = plot_df['Progressive'].min(), plot_df['Progressive'].max()

# Add a small margin to ensure all points are visible
margin = 0.1  # 10% margin
x_range = x_max - x_min
y_range = y_max - y_min
z_range = z_max - z_min
# Plotting

fig = px.scatter_3d(
    plot_df, x='Libertarian', y='Collectivist', z='Progressive', 
    color='Ideology', 
    color_discrete_map=color_discrete_map,
    opacity=0.8,
    #color_continuous_scale=px.colors.sequential.Rainbow_r,
    #color_discrete_sequence=px.colors.sequential.Rainbow_r,
    title=f'Assigned Axes Embeddings (All layers Combined)',
    hover_data={
        'Libertarian': True,
        'Collectivist': True,
        'Progressive': True,
        'Topic': True,
        #'Response': True
    }
)


# Customize the layout
fig.update_layout(
    scene = dict(
        xaxis_title='Libertarian',
        yaxis_title='Collectivist',
        zaxis_title='Progressive',
        xaxis=dict(range=[x_min - margin * x_range, x_max + margin * x_range]),
        yaxis=dict(range=[y_min - margin * y_range, y_max + margin * y_range]),
        zaxis=dict(range=[z_min - margin * z_range, z_max + margin * z_range])
    ),
    scene_aspectmode='cube'
)


image_output_folder=f"{WORK_DIR}/standard_img"
file_path = f'{image_output_folder}/scatter_proj.html'
fig.write_html(file_path)

#### layer-wise

In [None]:
p_value_threshold = 5e-6

# Function to calculate mean embeddings and filter significant features layer-wise
def calculate_mean_embeddings_with_filter(df, axis, p_value_threshold=p_value_threshold):
    type_a_label = axis["type_a"]
    type_b_label = axis["type_b"]
    
    type_a_embeddings = np.stack(df[df[axis["column"]] == type_a_label]['Embedding'].values)
    type_b_embeddings = np.stack(df[df[axis["column"]] == type_b_label]['Embedding'].values)
    print(type_a_embeddings.shape, type_b_embeddings.shape)
    
    num_layers = type_a_embeddings.shape[1]
    direction_vector = np.zeros_like(type_a_embeddings[0])
    significant_features = np.zeros_like(type_a_embeddings[0], dtype=bool)
    
    for layer in range(num_layers):
        # Calculate mean and variance for this layer
        type_a_mean = np.mean(type_a_embeddings[:, layer, :], axis=0)
        type_b_mean = np.mean(type_b_embeddings[:, layer, :], axis=0)
        # Perform t-test for each feature in this layer
        t_values, p_values = ttest_ind(type_a_embeddings[:, layer, :], type_b_embeddings[:, layer, :], axis=0, equal_var=False)
        # Create filter for significant features in this layer
        layer_significant_features = p_values < p_value_threshold
        # Calculate direction vector for significant features in this layer
        layer_direction_vector = np.zeros_like(type_a_mean)
        layer_direction_vector[layer_significant_features] = type_a_mean[layer_significant_features] - type_b_mean[layer_significant_features]
        # Store results for this layer
        direction_vector[layer, :] = layer_direction_vector
        significant_features[layer, :] = layer_significant_features
    return direction_vector, significant_features


axes = {
    "Libertarian": {"column": "Libertarian", "type_a": "Libertär", "type_b": "Restriktiv"},
    "Collectivist": {"column": "Collectivist", "type_a": "Kollektivistisch", "type_b": "Individualistisch"},
    "Progressive": {"column": "Progressive", "type_a": "Progressiv", "type_b": "Konservativ"}
}

# Calculate mean embeddings and filters for each axis
mean_embeddings = {}
significant_features = {}
for axis_name, axis in axes.items():
    mean_embeddings[axis_name], significant_features[axis_name] = calculate_mean_embeddings_with_filter(df_standard, axis)
    # Print summary for each layer
    num_layers = significant_features[axis_name].shape[0]
    for layer in range(num_layers):
        num_significant_features = np.sum(significant_features[axis_name][layer])
        total_features = significant_features[axis_name][layer].size
        print(f"{axis_name} - Layer {layer}: {num_significant_features} of {total_features} features used")


In [None]:

num_layers = 32  # As each embedding has 32 layers

# Create a new DataFrame for plotting
plot_data = []

for layer in tqdm(range(num_layers), desc="Processing layers"):
    layer_plot_data = []
    for index, row in df.iterrows():
        embedding = row['Embedding'][layer]  # Get the embedding for this layer
        # Apply the significant feature filter to the embeddings
        filtered_embedding = {}
        for axis_name in axes.keys():
            filtered_embedding[axis_name] = embedding * significant_features[axis_name][layer]
        # Calculate scores for each axis using only significant features
        libertarian_score = np.dot(filtered_embedding["Libertarian"].flatten(), 
                                   mean_embeddings["Libertarian"][layer].flatten())
        collectivist_score = np.dot(filtered_embedding["Collectivist"].flatten(), 
                                    mean_embeddings["Collectivist"][layer].flatten())
        progressive_score = np.dot(filtered_embedding["Progressive"].flatten(), 
                                   mean_embeddings["Progressive"][layer].flatten())
        layer_plot_data.append({
            'Libertarian': libertarian_score,
            'Collectivist': collectivist_score,
            'Progressive': progressive_score,
            'Topic': row['Topic'],
            'Ideology': row['Ideology'],
            'Layer': layer
        })
    # Convert layer_plot_data to DataFrame
    layer_df = pd.DataFrame(layer_plot_data)
    # Min-max normalize scores for this layer
    for axis in ['Libertarian', 'Collectivist', 'Progressive']:
        min_val = layer_df[axis].min()
        max_val = layer_df[axis].max()
        layer_df[axis] = (layer_df[axis] - min_val) / (max_val - min_val)
    plot_data.extend(layer_df.to_dict('records'))

plot_df = pd.DataFrame(plot_data)

# Calculate the min and max values for each axis across all layers
x_min, x_max = plot_df['Libertarian'].min(), plot_df['Libertarian'].max()
y_min, y_max = plot_df['Collectivist'].min(), plot_df['Collectivist'].max()
z_min, z_max = plot_df['Progressive'].min(), plot_df['Progressive'].max()

# Add a small margin to ensure all points are visible
margin = 0.1  # 10% margin
x_range = x_max - x_min
y_range = y_max - y_min
z_range = z_max - z_min

# Create 3D scatter plot
fig = px.scatter_3d(
    plot_df, x='Libertarian', y='Collectivist', z='Progressive', 
    color='Ideology', 
    color_discrete_map=color_discrete_map,
    opacity=0.8,
    title='Assigned Axes Embeddings (Animated through Layers)',
    hover_data=['Topic'],
    animation_frame='Layer'
)

# Customize the layout
fig.update_layout(
    scene = dict(
        xaxis_title='Libertarian',
        yaxis_title='Collectivist',
        zaxis_title='Progressive',
        xaxis=dict(range=[x_min - margin * x_range, x_max + margin * x_range]),
        yaxis=dict(range=[y_min - margin * y_range, y_max + margin * y_range]),
        zaxis=dict(range=[z_min - margin * z_range, z_max + margin * z_range])
    ),
    scene_aspectmode='cube'
)

# Save the plot
image_output_folder = f"{WORK_DIR}/standard_img"
file_path = f'{image_output_folder}/scatter_proj_animated.html'
fig.write_html(file_path)

## Real Text

In [None]:

df_speech_filtered_concat = pd.read_pickle('df_speech_filtered_concat.pkl')



# Function to load embeddings from a tensor file
def load_embedding(index, path):
    file_path = f"{path}/{index}.pt"
    return torch.load(file_path, map_location='cpu')

# Load all real text activations and add them to the DataFrame
ACTIVATIONS_CACHE_DIR = f"/home/atuin/b207dd/b207dd11/test/DEU/activations/{model_name}"
real_text_embeddings = []

for index in tqdm(range(len(df_speech_filtered_concat))):
    real_text_embeddings.append(load_embedding(index, ACTIVATIONS_CACHE_DIR).numpy())

df_speech_filtered_concat['Embedding'] = real_text_embeddings




df_speech_filtered_concat.to_pickle(f"{ACTIVATIONS_CACHE_DIR}/df_speech_filtered_concat.pkl")

In [None]:

ACTIVATIONS_CACHE_DIR = f"/home/atuin/b207dd/b207dd11/test/DEU/activations/{model_name}"
df_speech_filtered_concat = pd.read_pickle(f"{ACTIVATIONS_CACHE_DIR}/df_speech_filtered_concat.pkl")

In [None]:
def calculate_mean_embedding(embeddings):
    return np.mean(np.stack(embeddings), axis=0)


# Group by 'Speaker' and calculate mean embedding and count of entries
speaker_data = []

for speaker, group in tqdm(df_speech_filtered_concat.groupby('Speaker')):
    mean_embedding = calculate_mean_embedding(group['Embedding'].values)
    if np.isnan(mean_embedding).any():
        print(group)
    speaker_info = group.iloc[0][['Speaker', 'Partei', 'Religion']]
    entry_count = len(group)
    speaker_data.append({
        **speaker_info,
        "Embedding": mean_embedding,
        "EntryCount": entry_count
    })

# Create a new DataFrame from the speaker data
df_speaker_mean_embeddings = pd.DataFrame(speaker_data)

# Save the new DataFrame to a pickle file
df_speaker_mean_embeddings.to_pickle(f"{ACTIVATIONS_CACHE_DIR}/df_speaker_mean_embeddings.pkl")



In [None]:
df_speaker_mean_embeddings = pd.read_pickle(f"{ACTIVATIONS_CACHE_DIR}/df_speaker_mean_embeddings.pkl")

In [None]:

# Plotting
party_color_discrete_map = {
    'SPD': '#E3000F',          # Red
    'BÜNDNIS 90/DIE GRÜNEN': '#46962B',  # Green
    'CSU': '#008AC5',          # Blue
    'CDU': '#000000',          # Black
    'AfD': '#009EE0',          # Light Blue
    'FDP': '#FFED00',          # Yellow
    'DIE LINKE.': '#BE3075'    # Magenta
}


### PCA

In [None]:

def pca_umap_plot(df, 
                  pca_cumul_var_ratio_thresh=None,
                  pca_n_components=None, 
                  umap_n_components=3, 
                  umap_n_neighbors=15, 
                  umap_min_dist=0.1, 
                  layer_list=None,
                  image_output_folder=""):
    embeddings = np.stack(df['Embedding'].values)
    n_layers = embeddings.shape[1]
    if layer_list is None:
        layer_list = range(n_layers)
    #
    for layer in tqdm(layer_list, desc="Processing layers"):
        layer_embeddings = embeddings[:, layer, :]
        # Standardize embeddings
        mean = np.mean(layer_embeddings, axis=0)
        std = np.std(layer_embeddings, axis=0)
        standardized_embeddings = (layer_embeddings - mean) / std
        # PCA
        if pca_n_components:
            num_components = pca_n_components
            pca = PCA(n_components=num_components)
            pca_embeddings = pca.fit_transform(standardized_embeddings)
        elif pca_cumul_var_ratio_thresh:
            pca = PCA()
            pca.fit(standardized_embeddings)
            cumul_var_ratio = np.cumsum(pca.explained_variance_ratio_)
            num_components = np.where(cumul_var_ratio >= pca_cumul_var_ratio_thresh)[0][0] + 1
            if num_components < umap_n_components:
                num_components = umap_n_components
            pca_embeddings = pca.transform(standardized_embeddings)[:, :num_components]
        print(f"Layer {layer} PCA num_components {num_components}")
        # UMAP
        if umap_n_components < num_components:
            umap_reducer = umap.UMAP(n_components=umap_n_components, 
                                    n_neighbors=umap_n_neighbors, 
                                    min_dist=umap_min_dist, 
                                    metric='cosine', 
                                    random_state=42)
            umap_embeddings = umap_reducer.fit_transform(pca_embeddings)
        else:
            umap_embeddings = pca_embeddings
        # df for plotting
        plot_df = df.copy()
        plot_df['UMAP1'] = umap_embeddings[:, 0]
        plot_df['UMAP2'] = umap_embeddings[:, 1]
        plot_df['UMAP3'] = umap_embeddings[:, 2] if umap_n_components == 3 else np.zeros(umap_embeddings.shape[0])
        # Plotting
        fig = px.scatter_3d(
            plot_df, x='UMAP1', y='UMAP2', z='UMAP3', 
            color='Partei', 
            size='EntryCount',
            opacity=0.5,
            color_discrete_map=party_color_discrete_map,
            #color_continuous_scale=px.colors.sequential.Rainbow_r,
            #color_discrete_sequence=px.colors.sequential.Rainbow_r,
            title=f'Layer {layer} Real Speaker Activations on UMAP Axes',
            hover_data={
                'Speaker': True
            }
        )
        #fig.update_traces(marker_size = 5)
        file_path = f'{image_output_folder}/scatter_layer{layer}_pca{num_components}_umap{umap_n_components}.html'
        fig.write_html(file_path)
        plt.close()
        plt.clf()

# Call the function
pca_umap_plot(
    #df_speech_filtered_concat.sample(1000), 
    df_speaker_mean_embeddings.sort_values(by='EntryCount', ascending=False).iloc[6:],
    #pca_cumul_var_ratio_thresh=.5,
    pca_n_components=6, 
    umap_n_components=3, 
    umap_n_neighbors=20, 
    umap_min_dist=0.1, 
    #layer_list=[i for i in range(3)],
    #image_output_folder=f"{WORK_DIR}/real_1000_img"
    image_output_folder=f"{WORK_DIR}/real_speaker_img"
)


### Axis

In [None]:
# Create a new DataFrame for plotting
plot_data = []

for index, row in tqdm(df_speech_filtered_concat.iterrows(), total=df_speech_filtered_concat.shape[0], desc="Processing rows"):
    embedding = row['Embedding']
    libertarian_score = np.dot(embedding.flatten(), mean_embeddings["Libertarian"].flatten()) - np.dot(embedding.flatten(), mean_embeddings["Libertarian"][1].flatten())
    collectivist_score = np.dot(embedding.flatten(), mean_embeddings["Collectivist"].flatten()) - np.dot(embedding.flatten(), mean_embeddings["Collectivist"][1].flatten())
    progressive_score = np.dot(embedding.flatten(), mean_embeddings["Progressive"].flatten()) - np.dot(embedding.flatten(), mean_embeddings["Progressive"][1].flatten())
    #
    plot_data.append({
        "Partei": row["Partei"],
        "Libertarian": libertarian_score,
        "Collectivist": collectivist_score,
        "Progressive": progressive_score,
        "Text": row["Text"],
    })

plot_df = pd.DataFrame(plot_data)


fig = px.scatter_3d(
    plot_df, x='Libertarian', y='Collectivist', z='Progressive', 
    color='Partei', 
    opacity=0.5,
    color_discrete_map=party_color_discrete_map,
    #color_continuous_scale=px.colors.sequential.Rainbow_r,
    #color_discrete_sequence=px.colors.sequential.Rainbow_r,
    title=f'Real Text Activations on Assigned Axes'
)




image_output_folder=f"{WORK_DIR}/standard_img"
file_path = f'{image_output_folder}/scatter_real_data_manual.html'
fig.write_html(file_path)

In [None]:
df_tmp = df_speaker_mean_embeddings.sort_values(by='EntryCount', ascending=False).iloc[6:]
# Create a new DataFrame for plotting
plot_data = []

for index, row in tqdm(df_tmp.iterrows(), total=df_tmp.shape[0], desc="Processing rows"):
    embedding = row['Embedding']
    # Apply the significant feature filter to the embeddings
    filtered_embedding = {}
    for axis_name in axes.keys():
        filtered_embedding[axis_name] = embedding * significant_features[axis_name]
    # Calculate scores for each axis using only significant features
    libertarian_score = np.dot(filtered_embedding["Libertarian"].flatten(), mean_embeddings["Libertarian"].flatten())
    collectivist_score = np.dot(filtered_embedding["Collectivist"].flatten(), mean_embeddings["Collectivist"].flatten())
    progressive_score = np.dot(filtered_embedding["Progressive"].flatten(), mean_embeddings["Progressive"].flatten())
    #
    plot_data.append({
        "Speaker": row["Speaker"],
        "Party": row["Partei"],
        "Libertarian": libertarian_score,
        "Collectivist": collectivist_score,
        "Progressive": progressive_score,
        "EntryCount": row["EntryCount"],
        "Category": 'Speaker'
    })

plot_df = pd.DataFrame(plot_data)


# Calculate centroids for each party
centroid_data = plot_df.groupby('Party').agg({
    'Libertarian': 'mean',
    'Collectivist': 'mean',
    'Progressive': 'mean',
    'EntryCount': 'sum'
}).reset_index()

#centroid_data['Color'] = centroid_data['Party']  # Add a placeholder for Speaker
centroid_data['Category'] = 'Party'

# Append centroids to plot_df
plot_df = pd.concat([plot_df, centroid_data], ignore_index=True)


# Calculate the mean and standard deviation for each dimension
mean = plot_df[['Libertarian', 'Collectivist', 'Progressive']].mean()
std = plot_df[['Libertarian', 'Collectivist', 'Progressive']].std()

# Normalize the data (z-score normalization)
plot_df['Libertarian'] = (plot_df['Libertarian'] - mean['Libertarian']) / std['Libertarian']
plot_df['Collectivist'] = (plot_df['Collectivist'] - mean['Collectivist']) / std['Collectivist']
plot_df['Progressive'] = (plot_df['Progressive'] - mean['Progressive']) / std['Progressive']

# If you want to scale the data to a specific range, e.g., [-1, 1], you can do:
# (This step is optional and depends on your preference)
scale_factor = 3  # This will make about 99.7% of the data fall within [-1, 1]
plot_df['Libertarian'] /= scale_factor
plot_df['Collectivist'] /= scale_factor
plot_df['Progressive'] /= scale_factor


libertarian_range  = [-0.99, 0.99]
collectivist_range = [-0.99, 0.99]
progressive_range  = [-0.99, 0.99]


fig = px.scatter_3d(
    plot_df, x='Libertarian', y='Collectivist', z='Progressive', 
    color='Party', 
    size='EntryCount',
    size_max=64,
    symbol='Category',
    symbol_map={'Party':'circle', 'Speaker':'square'},
    opacity=0.56,
    color_discrete_map=party_color_discrete_map,
    range_x=libertarian_range,
    range_y=collectivist_range,
    range_z=progressive_range,
    # title=f'Speaker and Party Activations Projected on Assigned Axes (All Layers Combined)',
    hover_data={
        'Speaker': True
    }
)

# Customize the layout
fig.update_layout(
    font_family="Helvetica",
    scene = dict(
        xaxis_title='Libertarian',
        yaxis_title='Collectivist',
        zaxis_title='Progressive',
    ),
    scene_aspectmode='cube',
    legend=dict(
        orientation="h"
    )
)

image_output_folder=f"{WORK_DIR}/real_speaker_img"
file_path = f'{image_output_folder}/scatter_real_data_proj.html'
fig.write_html(file_path)

#### layer-wise

In [None]:
df_tmp = df_speaker_mean_embeddings.sort_values(by='EntryCount', ascending=False).iloc[6:]
# Create a new DataFrame for plotting
plot_data = []

num_layers = df_tmp.iloc[0]['Embedding'].shape[0]

# Create a new DataFrame for plotting
plot_data = []

for layer in tqdm(range(num_layers), desc="Processing layers"):
    layer_plot_data = []
    for index, row in df_tmp.iterrows():
        embedding = row['Embedding'][layer]  # Get the embedding for this layer
        # Apply the significant feature filter to the embeddings
        filtered_embedding = {}
        for axis_name in axes.keys():
            filtered_embedding[axis_name] = embedding * significant_features[axis_name][layer]
        # Calculate scores for each axis using only significant features
        libertarian_score = np.dot(filtered_embedding["Libertarian"], mean_embeddings["Libertarian"][layer])
        collectivist_score = np.dot(filtered_embedding["Collectivist"], mean_embeddings["Collectivist"][layer])
        progressive_score = np.dot(filtered_embedding["Progressive"], mean_embeddings["Progressive"][layer])
        #
        layer_plot_data.append({
            "Speaker": row["Speaker"],
            "Partei": row["Partei"],
            "Libertarian": libertarian_score,
            "Collectivist": collectivist_score,
            "Progressive": progressive_score,
            "EntryCount": row["EntryCount"],
            "Color": row["Partei"],
            "Symbol": 'Speaker'
            "Layer": layer
        })
    # Calculate centroids for each party
    layer_df = pd.DataFrame(layer_plot_data)
    # Min-max normalize scores for this layer
    for axis in ['Libertarian', 'Collectivist', 'Progressive']:
        min_val = layer_df[axis].min()
        max_val = layer_df[axis].max()
        layer_df[axis] = (layer_df[axis] - min_val) / (max_val - min_val)
    # Calculate centroids after normalization
    centroid_data = layer_df.groupby('Partei').agg({
        'Libertarian': 'mean',
        'Collectivist': 'mean',
        'Progressive': 'mean',
        'EntryCount': 'sum'
    }).reset_index()
    centroid_data['Speaker'] = centroid_data['Partei']  # Add a placeholder for Speaker
    centroid_data['Symbol'] = 'Partei'
    centroid_data['Layer'] = layer
    # Append centroids to plot_data
    plot_data.extend(layer_df.to_dict('records'))
    plot_data.extend(centroid_data.to_dict('records'))

plot_df = pd.DataFrame(plot_data)


# Calculate the min and max values for each axis across all layers
libertarian_range = [plot_df['Libertarian'].min(), plot_df['Libertarian'].max()]
collectivist_range = [plot_df['Collectivist'].min(), plot_df['Collectivist'].max()]
progressive_range = [plot_df['Progressive'].min(), plot_df['Progressive'].max()]


fig = px.scatter_3d(
    plot_df, x='Libertarian', y='Collectivist', z='Progressive', 
    color='Partei', 
    size='EntryCount',
    size_max=64,
    symbol='Symbol',
    symbol_map={'Partei':'circle', 'Speaker':'square'},
    opacity=0.56,
    color_discrete_map=party_color_discrete_map,
    range_x=libertarian_range,
    range_y=collectivist_range,
    range_z=progressive_range,
    title='Real Speaker Activations Projected on Assigned Axes (Animated through Layers)',
    hover_data=['Speaker'],
    animation_frame='Layer'
)

# Customize the layout
fig.update_layout(
    scene = dict(
        xaxis_title='Libertarian',
        yaxis_title='Collectivist',
        zaxis_title='Progressive',
    ),
    scene_aspectmode='cube'
)


image_output_folder=f"{WORK_DIR}/real_speaker_img"
file_path = f'{image_output_folder}/scatter_real_data_proj_animated.html'
fig.write_html(file_path)

## LDA

In [None]:
df_tmp = df_speaker_mean_embeddings.sort_values(by='EntryCount', ascending=False).iloc[6:]

num_layers = df_tmp.iloc[0]['Embedding'].shape[0]

# Create a new DataFrame for plotting
plot_data = []

for layer in tqdm(range(num_layers)):
    layer_plot_data = []
    # Prepare data for LDA
    X = np.array([row['Embedding'][layer].flatten() for _, row in df_tmp.iterrows()])
    y = df_tmp['Partei'].values
    # Encode party labels
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    # Perform LDA
    lda = LinearDiscriminantAnalysis(n_components=3)
    X_lda = lda.fit_transform(X, y_encoded)
    # Create plot data
    for i, (_, row) in enumerate(df_tmp.iterrows()):
        layer_plot_data.append({
            "Partei": row["Partei"],
            "LDA1": X_lda[i, 0],
            "LDA2": X_lda[i, 1],
            "LDA3": X_lda[i, 2],
            "Speaker": row["Speaker"],
            "SpeakerStatus": row["SpeakerStatus"],
            "Religion": row["Religion"],
            "EntryCount": row["EntryCount"],
            "Symbol": 'Speaker',
            "Layer": layer
        })
    # Calculate centroids for each party
    centroid_data = pd.DataFrame(layer_plot_data).groupby('Partei').agg({
        'LDA1': 'mean',
        'LDA2': 'mean',
        'LDA3': 'mean',
        'EntryCount': 'sum'
    }).reset_index()
    centroid_data['Speaker'] = centroid_data['Partei']  # Add a placeholder for Speaker
    centroid_data['Symbol'] = 'Partei'
    centroid_data['Layer'] = layer
    # Append centroids to plot_df
    plot_data.extend(layer_plot_data)
    plot_data.extend(centroid_data.to_dict('records'))

plot_df = pd.DataFrame(plot_data)

# Calculate the min and max values for each axis across all layers
x_min, x_max = plot_df['LDA1'].min(), plot_df['LDA1'].max()
y_min, y_max = plot_df['LDA2'].min(), plot_df['LDA2'].max()
z_min, z_max = plot_df['LDA3'].min(), plot_df['LDA3'].max()
# Add a small margin to ensure all points are visible
margin = 0.1  # 10% margin
x_range = x_max - x_min
y_range = y_max - y_min
z_range = z_max - z_min

# Create 3D scatter plot
fig = px.scatter_3d(
    plot_df, x='LDA1', y='LDA2', z='LDA3', 
    color='Partei', 
    size='EntryCount',
    size_max=64,
    symbol='Symbol',
    symbol_map={'Partei':'circle', 'Speaker':'square'},
    opacity=0.56,
    color_discrete_map=party_color_discrete_map,
    title='Speaker Embeddings: LDA 3D Projection (Animated through Layers)',
    hover_data=['Speaker', 'SpeakerStatus', 'Religion', 'EntryCount'],
    animation_frame='Layer'
)
# Customize the layout
fig.update_layout(
    scene = dict(
        xaxis_title='LDA1',
        yaxis_title='LDA2',
        zaxis_title='LDA3',
        xaxis=dict(range=[x_min - margin * x_range, x_max + margin * x_range]),
        yaxis=dict(range=[y_min - margin * y_range, y_max + margin * y_range]),
        zaxis=dict(range=[z_min - margin * z_range, z_max + margin * z_range])
    ),
    scene_aspectmode='cube'
)


# Save the plot
image_output_folder = f"{WORK_DIR}/real_speaker_img"
file_path = f'{image_output_folder}/scatter_lda_3d_animated.html'
fig.write_html(file_path)