# 🧠 Reddit Rule Violation 분류 프로젝트

이 노트북은 Reddit 댓글이 주어진 규칙을 위반했는지를 예측하는 이진 분류 과제를 위한 실험 전체를 정리한 것입니다.

## 📌 주요 목표
- 댓글(`body`)과 규칙(`rule`) 간 의미적 유사도 기반 feature 생성
- positive/negative 예시와의 유사도(`pos_sim`, `neg_sim`)를 feature로 추가
- 간단한 모델(LogisticRegression)과 BERT fine-tuning 실험

---


In [None]:
import pandas as pd
from tqdm import tqdm
from sentence_transformers import SentenceTransformer, util

# Load dataset
df = pd.read_csv("train.csv")

# Load model
model = SentenceTransformer("all-MiniLM-L6-v2")
tqdm.pandas()

# 유사도 계산 함수
def compute_similarities(row):
    body_vec = model.encode(row['body'], convert_to_tensor=True)
    rule_vec = model.encode(row['rule'], convert_to_tensor=True)
    pos_sim = (
        util.cos_sim(body_vec, model.encode(row['positive_example_1'], convert_to_tensor=True)) +
        util.cos_sim(body_vec, model.encode(row['positive_example_2'], convert_to_tensor=True))
    ) / 2
    neg_sim = (
        util.cos_sim(body_vec, model.encode(row['negative_example_1'], convert_to_tensor=True)) +
        util.cos_sim(body_vec, model.encode(row['negative_example_2'], convert_to_tensor=True))
    ) / 2
    return pd.Series({
        'body_rule_similarity': util.cos_sim(body_vec, rule_vec).item(),
        'pos_sim': pos_sim.item(),
        'neg_sim': neg_sim.item()
    })

# 적용
df[['body_rule_similarity', 'pos_sim', 'neg_sim']] = df.progress_apply(compute_similarities, axis=1)

# BERT 입력용 텍스트 생성
df['bert_input'] = "[RULE] " + df['rule'] + " [SEP] " + df['body']


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# 히스토그램
sns.histplot(df['body_rule_similarity'], bins=30, kde=True)
plt.title("Body-Rule Similarity Distribution")
plt.xlabel("Cosine Similarity")
plt.ylabel("Count")
plt.show()

# 박스플롯
sns.boxplot(x='rule_violation', y='body_rule_similarity', data=df)
plt.title("Similarity by Rule Violation")
plt.show()


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score

X = df[['body_rule_similarity', 'pos_sim', 'neg_sim']]
y = df['rule_violation']

model = LogisticRegression()
model.fit(X, y)

y_pred = model.predict(X)
y_prob = model.predict_proba(X)[:, 1]

print("ROC AUC:", roc_auc_score(y, y_prob))
print(classification_report(y, y_pred))


In [None]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch
from sklearn.model_selection import train_test_split

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
X_train, X_val, y_train, y_val = train_test_split(df['bert_input'], df['rule_violation'], test_size=0.2, random_state=42)

train_encodings = tokenizer(X_train.tolist(), truncation=True, padding=True, max_length=256)
val_encodings = tokenizer(X_val.tolist(), truncation=True, padding=True, max_length=256)

class RedditDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels.tolist()
    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()} | {'labels': torch.tensor(self.labels[idx])}
    def __len__(self):
        return len(self.labels)

train_dataset = RedditDataset(train_encodings, y_train)
val_dataset = RedditDataset(val_encodings, y_val)

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=100,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

# 학습 시작
# trainer.train()


In [None]:
# BERT 평가
from sklearn.metrics import roc_auc_score, classification_report

preds_output = trainer.predict(val_dataset)
y_pred = preds_output.predictions.argmax(-1)
y_true = y_val.values
probs = preds_output.predictions[:, 1]

print("ROC AUC:", roc_auc_score(y_true, probs))
print(classification_report(y_true, y_pred))
