- **gender (3)**: male, female, gen_other
- **religion (2)**: christian, rel_other (jewish/muslim/hindu/buddhist/atheist/other_religion 포함)
- **race (4)**: black, white, asian, race_other

→ Total 9 one-hot columns (S shape: n×9)

**filtering**: 각 그룹에서 정확히 1개만 1인 행만 남김

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from pathlib import Path
import os

In [3]:
# Load Civil Comments data
raw_path = Path('raw/civil_comments/all_data.csv')
df0 = pd.read_csv(raw_path)

print(f"Raw shape: {df0.shape}")
print(f"Columns: {list(df0.columns)[:30]}...")

Raw shape: (1999516, 46)
Columns: ['id', 'comment_text', 'split', 'created_date', 'publication_id', 'parent_id', 'article_id', 'rating', 'funny', 'wow', 'sad', 'likes', 'disagree', 'toxicity', 'severe_toxicity', 'obscene', 'sexual_explicit', 'identity_attack', 'insult', 'threat', 'male', 'female', 'transgender', 'other_gender', 'heterosexual', 'homosexual_gay_or_lesbian', 'bisexual', 'other_sexual_orientation', 'christian', 'jewish']...


Index(['id', 'comment_text', 'split', 'created_date', 'publication_id',
       'parent_id', 'article_id', 'rating', 'funny', 'wow', 'sad', 'likes',
       'disagree', 'toxicity', 'severe_toxicity', 'obscene', 'sexual_explicit',
       'identity_attack', 'insult', 'threat', 'male', 'female', 'transgender',
       'other_gender', 'heterosexual', 'homosexual_gay_or_lesbian', 'bisexual',
       'other_sexual_orientation', 'christian', 'jewish', 'muslim', 'hindu',
       'buddhist', 'atheist', 'other_religion', 'black', 'white', 'asian',
       'latino', 'other_race_or_ethnicity', 'physical_disability',
       'intellectual_or_learning_disability', 'psychiatric_or_mental_illness',
       'other_disability', 'identity_annotator_count',
       'toxicity_annotator_count'],
      dtype='object')

In [3]:
def safe_cols(cands, df):
    return [c for c in cands if c in df.columns]

# ============================================================
# 민감속성 구성
# ============================================================

# Gender: male, female, gen_other (3개)
gender_other_sources = [
    'transgender', 'other_gender',
    'heterosexual', 'homosexual_gay_or_lesbian', 'bisexual',
    'other_sexual_orientation', 'LGBTQ'
]
for c in ['male', 'female'] + gender_other_sources:
    if c not in df0.columns:
        df0[c] = 0
df0['gen_other'] = (df0[safe_cols(gender_other_sources, df0)].fillna(0).sum(axis=1) > 0).astype(int)
gender_cols = ['male', 'female', 'gen_other']

# Religion: christian, rel_other (2개만!) - DRAF 기준
# rel_other = jewish, muslim, hindu, buddhist, atheist, other_religion, other_religions 중 1개 이상
religion_other_sources = ['hindu', 'jewish', 'muslim', 'buddhist', 'atheist', 'other_religion', 'other_religions']
for c in ['christian'] + religion_other_sources:
    if c not in df0.columns:
        df0[c] = 0
df0['rel_other'] = (df0[safe_cols(religion_other_sources, df0)].fillna(0).sum(axis=1) > 0).astype(int)
religion_cols = ['christian', 'rel_other']  # 2개만!

# Race: black, white, asian, race_other (4개)
race_other_sources = ['latino', 'other_race_or_ethnicity', 'asian_latino_etc']
for c in ['black', 'white', 'asian'] + race_other_sources:
    if c not in df0.columns:
        df0[c] = 0
df0['race_other'] = (df0[safe_cols(race_other_sources, df0)].fillna(0).sum(axis=1) > 0).astype(int)
race_cols = ['black', 'white', 'asian', 'race_other']

# ============================================================
# 필터링: 각 그룹에서 정확히 1개만 1인 행만 유지
# ============================================================
df0_gender = (df0[gender_cols].fillna(0) > 0).astype(int)
df0_religion = (df0[religion_cols].fillna(0) > 0).astype(int)
df0_race = (df0[race_cols].fillna(0) > 0).astype(int)

mask_gender = (df0_gender.sum(axis=1) == 1)
mask_religion = (df0_religion.sum(axis=1) == 1)
mask_race = (df0_race.sum(axis=1) == 1)
mask_valid = mask_gender & mask_religion & mask_race

print(f"Gender valid (exactly 1-hot): {mask_gender.sum():,}")
print(f"Religion valid (exactly 1-hot): {mask_religion.sum():,}")
print(f"Race valid (exactly 1-hot): {mask_race.sum():,}")
print(f"All valid (intersection): {mask_valid.sum():,}")

Gender valid (exactly 1-hot): 101,678
Religion valid (exactly 1-hot): 81,284
Race valid (exactly 1-hot): 54,037
All valid (intersection): 3,408


In [4]:
# Apply filter
df = df0.loc[mask_valid].reset_index(drop=True)
print(f"Filtered shape: {df.shape}")

# Build S matrix (one-hot for each group)
S_cols = gender_cols + religion_cols + race_cols
S_df = (df[S_cols].fillna(0) > 0).astype(int)
print(f"S shape: {S_df.shape}")
print(f"S columns: {list(S_df.columns)}")

# Label: toxicity > 0.5
tox_threshold = 0.5
y = (df['toxicity'] > tox_threshold).astype(int).values
print(f"Label distribution: {np.bincount(y)}")

Filtered shape: (3408, 52)
S shape: (3408, 9)
S columns: ['male', 'female', 'gen_other', 'christian', 'rel_other', 'black', 'white', 'asian', 'race_other']
Label distribution: [3033  375]


In [5]:
# ============================================================
# Feature extraction: RoBERTa 임베딩 사용 (DRAF 방식)
# ============================================================
roberta_path = '/data/share/FairLLM/civil/distillroberta_base_all_features.npy'

if os.path.exists(roberta_path):
    # Load RoBERTa embeddings
    rb_all = np.load(roberta_path)
    print(f"RoBERTa embeddings loaded: {rb_all.shape}")
    
    # Filter by valid mask
    n_raw = len(df0)
    n_valid = mask_valid.sum()
    
    if rb_all.shape[0] == n_raw:
        # raw 크기와 일치 → mask 적용
        rb = rb_all[mask_valid.values]
        print(f"Filtered RoBERTa: {rb.shape} (from raw-aligned)")
    elif rb_all.shape[0] == n_valid:
        # 이미 필터된 크기와 일치
        rb = rb_all
        print(f"RoBERTa already filtered: {rb.shape}")
    else:
        raise ValueError(f"RoBERTa shape mismatch: {rb_all.shape[0]} vs raw={n_raw}, valid={n_valid}")
    
    # Standardize
    scaler = StandardScaler()
    X = scaler.fit_transform(rb.astype(np.float32))
    print(f"X shape (RoBERTa, scaled): {X.shape}")
    
else:
    print(f"[WARNING] RoBERTa file not found: {roberta_path}")
    print("Falling back to numeric features only...")
    
    # Fallback: 기존 numeric features
    all_identity_cols = (
        ['male', 'female'] + gender_other_sources + ['gen_other'] +
        ['christian'] + religion_other_sources + ['rel_other'] +
        ['black', 'white', 'asian'] + race_other_sources + ['race_other']
    )
    exclude_cols = (
        ['id', 'comment_text', 'toxicity', 'split', 'created_date', 'publication_id', 
         'parent_id', 'article_id', 'identity_attack', 'insult', 'obscene', 'severe_toxicity',
         'sexual_explicit', 'threat'] + all_identity_cols
    )
    feature_cols = [c for c in df.columns if c not in exclude_cols and df[c].dtype in ['float64', 'int64']]
    
    if len(feature_cols) == 0:
        X = np.ones((len(df), 1))
    else:
        X_raw = df[feature_cols].fillna(0)
        scaler = StandardScaler()
        X = scaler.fit_transform(X_raw)

print(f"\nFinal shapes:")
print(f"  X: {X.shape}")
print(f"  y: {y.shape}")
print(f"  S: {S_df.shape}")

Falling back to numeric features only...

Final shapes:
  X: (3408, 11)
  y: (3408,)
  S: (3408, 9)


In [None]:
# Save civil_comments(3): 3 groups → 9 one-hot columns
save_dir = Path('.') / 'civil_comments(3)'
save_dir.mkdir(parents=True, exist_ok=True)

S = S_df.values

np.save(save_dir / 'X.npy', X)
np.save(save_dir / 'y.npy', y)
np.save(save_dir / 'S.npy', S)

print(f"Saved civil_comments(3):")
print(f"  X: {X.shape} {'(RoBERTa embeddings)' if X.shape[1] == 768 else '(numeric features)'}")
print(f"  y: {y.shape}")
print(f"  S: {S.shape} (columns: {list(S_df.columns)})")
print(f"  - gender: {gender_cols}")
print(f"  - religion: {religion_cols}")
print(f"  - race: {race_cols}")

Saved civil_comments(3):
  X: (3408, 11) (numeric features)
  y: (3408,)
  S: (3408, 9) (columns: ['male', 'female', 'gen_other', 'christian', 'rel_other', 'black', 'white', 'asian', 'race_other'])
  - gender: ['male', 'female', 'gen_other']
  - religion: ['christian', 'rel_other']
  - race: ['black', 'white', 'asian', 'race_other']

Done!
