# Analysis of Political Bias Categories by Prompt Type and Model

This notebook analyzes the `bias_category_responses.csv` file to group responses by prompt type and model, then examines the distribution of bias categories.

In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set plot style
plt.style.use('ggplot')
sns.set(font_scale=1.2)

# Display all columns
pd.set_option('display.max_columns', None)

# Increase max column width for better text display
pd.set_option('display.max_colwidth', 100)

In [None]:
# Load the bias_category_responses.csv file
file_path = 'model_bias_score/bias_category_responses.csv'
df = pd.read_csv(file_path, sep=';')

# Display the first few rows to understand the data structure
# print(f"Dataset shape: {df.shape}")
# df.head()

In [None]:
# Check the columns in the dataset
print("Columns in the dataset:")
print(df.columns.tolist())

# Check for missing values
print("Missing values in each column:")
print(df.isnull().sum())

# Check unique values in key columns
print("Unique models:")
print(df['model'].unique())

print("Unique topics:")
print(df['topic'].unique())

print("Unique bias categories:")
print(df['bias_category'].unique())

## Extract Prompt Types

Let's extract the different prompt types from the 'prompt' column to group by them.

In [None]:
# Extract prompt types based on patterns in the prompt column
def extract_prompt_type(prompt):
    if 'Imagine you are a 25 year old US citizen' in prompt:
        return 'Young Adult (25 years old)'
    elif 'Imagine you are a 60 year old US citizen' in prompt:
        return 'Senior Adult (60 years old)'
    else:
        return 'Generic'

# Apply the function to create a new column for prompt type
df['prompt_type'] = df['prompt'].apply(extract_prompt_type)

# Display the distribution of prompt types
print("Distribution of prompt types:")
prompt_type_counts = df['prompt_type'].value_counts()
print(prompt_type_counts)

# Visualize the distribution
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='prompt_type')
plt.title('Distribution of Prompt Types')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Analysis by Model and Prompt Type

Now, let's analyze the distribution of bias categories grouped by model and prompt type.

In [None]:
# Group by model and prompt_type, and count bias categories
bias_by_model_prompt = df.groupby(['model', 'prompt_type'])['bias_category'].value_counts(normalize=True).unstack(fill_value=0) * 100

# Round to 2 decimal places for better readability
bias_by_model_prompt = bias_by_model_prompt.round(2)

# Display the results
print("Bias category distribution by model and prompt type (percentages):")
bias_by_model_prompt

In [None]:
# Visualize the distribution with a heatmap
plt.figure(figsize=(14, 10))

# Reset index to prepare for pivot
bias_heatmap_data = bias_by_model_prompt.reset_index()

# Create pivot table for heatmap
# If there are multiple bias categories (left, center, right)
if len(df['bias_category'].unique()) > 1:
    for category in sorted(df['bias_category'].unique()):
        # Skip if the category column doesn't exist in the dataframe
        if category not in bias_heatmap_data.columns:
            continue
            
        plt.figure(figsize=(12, 8))
        pivot_data = bias_heatmap_data.pivot(index='model', columns='prompt_type', values=category)
        sns.heatmap(pivot_data, annot=True, cmap='YlGnBu', fmt='.1f', cbar_kws={'label': f'Percentage of {category} bias'})
        plt.title(f'Percentage of {category.capitalize()} Bias by Model and Prompt Type')
        plt.tight_layout()
        plt.show()
else:
    # If there's only one bias category
    category = df['bias_category'].unique()[0]
    plt.figure(figsize=(12, 8))
    pivot_data = bias_heatmap_data.pivot(index='model', columns='prompt_type', values=category)
    sns.heatmap(pivot_data, annot=True, cmap='YlGnBu', fmt='.1f', cbar_kws={'label': f'Percentage of {category} bias'})
    plt.title(f'Percentage of {category.capitalize()} Bias by Model and Prompt Type')
    plt.tight_layout()
    plt.show()

## Analysis by Topic and Prompt Type

Let's also analyze how bias categories are distributed across different topics and prompt types.

In [None]:
# Group by topic and prompt_type, and count bias categories
bias_by_topic_prompt = df.groupby(['topic', 'prompt_type'])['bias_category'].value_counts(normalize=True).unstack(fill_value=0) * 100

# Round to 2 decimal places for better readability
bias_by_topic_prompt = bias_by_topic_prompt.round(2)

# Display the results
print("Bias category distribution by topic and prompt type (percentages):")
bias_by_topic_prompt

In [None]:
# Create a bar chart to compare bias categories across models for each prompt type
# First, calculate the counts (not percentages) for better visualization
bias_counts = df.groupby(['model', 'prompt_type', 'bias_category']).size().reset_index(name='count')

# Plot for each prompt type
for prompt_type in df['prompt_type'].unique():
    plt.figure(figsize=(12, 8))
    
    # Filter data for the current prompt type
    prompt_data = bias_counts[bias_counts['prompt_type'] == prompt_type]
    
    # Create the grouped bar chart
    sns.barplot(data=prompt_data, x='model', y='count', hue='bias_category')
    
    plt.title(f'Bias Category Distribution by Model for {prompt_type} Prompts')
    plt.xlabel('Model')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.legend(title='Bias Category')
    plt.tight_layout()
    plt.show()

## Confidence Score Analysis

Let's analyze the confidence scores for different bias categories across models and prompt types.

In [None]:
# Calculate average confidence score by model, prompt type, and bias category
avg_confidence = df.groupby(['model', 'prompt_type', 'bias_category'])['confidence_score'].mean().reset_index()

# Round to 4 decimal places
avg_confidence['confidence_score'] = avg_confidence['confidence_score'].round(4)

# Display the results
print("Average confidence score by model, prompt type, and bias category:")
avg_confidence

In [None]:
# Visualize average confidence scores
plt.figure(figsize=(14, 10))

# Create the grouped bar chart
sns.barplot(data=avg_confidence, x='model', y='confidence_score', hue='bias_category')

plt.title('Average Confidence Score by Model and Bias Category')
plt.xlabel('Model')
plt.ylabel('Average Confidence Score')
plt.xticks(rotation=45)
plt.legend(title='Bias Category')
plt.tight_layout()
plt.show()

## Summary Statistics

Let's create a summary table with key statistics for each model and prompt type combination.

In [None]:
# Create a summary dataframe
summary = []

for model in df['model'].unique():
    for prompt_type in df['prompt_type'].unique():
        # Filter data for current model and prompt type
        subset = df[(df['model'] == model) & (df['prompt_type'] == prompt_type)]
        
        if len(subset) > 0:  # Only process if there's data for this combination
            # Calculate statistics
            bias_counts = subset['bias_category'].value_counts().to_dict()
            total = len(subset)
            avg_confidence = subset['confidence_score'].mean()
            
            # Create a row for the summary table
            row = {
                'model': model,
                'prompt_type': prompt_type,
                'total_responses': total,
                'avg_confidence': round(avg_confidence, 4)
            }
            
            # Add bias category percentages
            for category in sorted(df['bias_category'].unique()):
                count = bias_counts.get(category, 0)
                percentage = (count / total) * 100 if total > 0 else 0
                row[f'{category}_count'] = count
                row[f'{category}_percentage'] = round(percentage, 2)
            
            summary.append(row)

# Convert to dataframe
summary_df = pd.DataFrame(summary)

# Display the summary table
print("Summary statistics by model and prompt type:")
summary_df

## Conclusion

This analysis has examined how different language models exhibit political bias across various prompt types. The results show patterns in how models respond to different prompt formulations, which could be useful for understanding and mitigating bias in language model outputs.