In [None]:
import pandas as pd
import numpy as np

bcorp = pd.read_csv('bias_corp_racial.csv')
bcorp.head()

## bias == racial bias ( 0, 1, 2, 3, 4, 5)
## indicating their confidence in the bias score (1-10)

In [None]:
df = bcorp.copy()

# Function to compute the normalized confidence-weighted bias score
def compute_weighted_bias(row):
    # Extract bias and confidence scores
    bias_scores = np.array([row['bias_1'], row['bias_2'], row['bias_3']])
    confidence_scores = np.array([row['confidence_1'], row['confidence_2'], row['confidence_3']])
    
    # Normalize confidence scores so they sum to 1
    normalized_conf = confidence_scores / confidence_scores.sum()
    
    # Compute the weighted bias score
    weighted_bias = np.dot(bias_scores, normalized_conf)
    return weighted_bias

# Apply the function to compute weighted bias scores
df['weighted_bias'] = df.apply(compute_weighted_bias, axis=1)

# Determine true labels based on a threshold (e.g., weighted bias >= 3 is racist)
df['true_label'] = (df['weighted_bias'] >= 3).astype(int)

df.head()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 5))
plt.hist(df['weighted_bias'], bins=10, edgecolor='black')
plt.title('Distribution of Weighted Bias Scores')
plt.xlabel('Weighted Bias Score')
plt.ylabel('Frequency')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
df.head()