<a href="https://colab.research.google.com/github/nasibehmohammadi/Thesis/blob/main/bert_d7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

# 1. بارگذاری دیتاست
file_path = "/content/d7.csv"  # اطمینان حاصل کنید که فایل را در Colab آپلود کرده‌اید
df = pd.read_csv(file_path)

# 2. تبدیل ویژگی‌های عددی به متون
def convert_row_to_text(row):
    text = " ".join([f"Feature_{i}: {val}" for i, val in enumerate(row)])
    return text

df["text"] = df.drop(columns=["F"]).apply(lambda row: convert_row_to_text(row), axis=1)
X = df["text"].values
y = df["F"].values  # برچسب‌های هدف

# 3. تقسیم داده‌ها به مجموعه‌های آموزش و تست
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. بارگذاری توکنایزر BERT
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# 5. تعریف کلاس دیتاست سفارشی برای PyTorch
class BertDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "label": torch.tensor(label, dtype=torch.long)
        }

# 6. ساخت دیتاست و DataLoader
train_dataset = BertDataset(X_train, y_train, tokenizer)
test_dataset = BertDataset(X_test, y_test, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# 7. ساخت مدل BERT با لایه‌ی دسته‌بندی سفارشی
class BertClassifier(nn.Module):
    def __init__(self, bert_model_name="bert-base-uncased", num_classes=2):
        super(BertClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model_name)
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        x = self.dropout(pooled_output)
        x = self.fc(x)
        return x

# 8. تنظیم مدل، تابع هزینه و بهینه‌ساز
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertClassifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=2e-5)

# 9. آموزش مدل
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in train_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

# 10. ارزیابی مدل روی مجموعه تست
model.eval()
y_preds = []
y_true = []

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        outputs = model(input_ids, attention_mask)
        preds = torch.argmax(outputs, dim=1)

        y_preds.extend(preds.cpu().numpy())
        y_true.extend(labels.cpu().numpy())

# 11. محاسبه دقت و F1-Score
accuracy = accuracy_score(y_true, y_preds)
f1 = f1_score(y_true, y_preds)

print(f"Accuracy: {accuracy:.4f}")
print(f"F1-Score: {f1:.4f}")


FileNotFoundError: [Errno 2] No such file or directory: '/content/d7.csv'