In [29]:
import torch
import torch.nn as nn
from sklearn.metrics import f1_score, precision_score, recall_score
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np

In [30]:
# Veri yüklemek için sınıf oluşturuyoruz

In [31]:
class MyDataset(Dataset):
    def __init__(self, file_path):
        self.data = pd.read_csv(file_path)
        self.x = np.array(self.data.iloc[:, :-1])
        self.y = np.array(self.data.iloc[:, -1])
        
    def __getitem__(self, index):
        return torch.tensor(self.x[index], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.long)
        
    def __len__(self):
        return len(self.data)


In [32]:
# Veri yüklemek

In [33]:
train_dataset = MyDataset('cure_the_princess_train.csv')
val_dataset = MyDataset('cure_the_princess_validation.csv')
test_dataset = MyDataset('cure_the_princess_test.csv')


In [34]:
# DataLoader'lar oluşturma

In [35]:
batch_size = 16
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [36]:
# MLP Modeli oluşturma

In [47]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(13, 100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 2)
        
    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [48]:
# MLP Modeli, Cross Entropy Loss ve SGD Optimizasyonu oluşturma

In [49]:
model = MLP()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [50]:
# Eğitim Fonksiyonunu tanımlama

In [51]:
def train(model, train_dataloader, optimizer, loss_fn):
    model.train()
    for batch, (data, target) in enumerate(train_dataloader):
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()


In [52]:
# Doğrulama Fonksiyonunu tanımlama

In [53]:
def evaluate(model, val_dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in val_dataloader:
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    return 100 * correct / total


In [54]:
# Early stopping ile en iyi modeli seçme

In [55]:
# Early stopping yapalım
best_val_acc = 0
patience = 3
counter = 0
for epoch in range(10):
    train(model, train_dataloader, optimizer, loss_fn)
    val_acc = evaluate(model, val_dataloader)
    print('Epoch:', epoch+1, '| Validation Accuracy:', val_acc)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pt')
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print('Early stopping.')
            break
            
# En iyi modeli yükleyelim
model.load_state_dict(torch.load('best_model.pt'))

# Test verisi üzerinde modelin doğruluğunu hesaplayalım
test_acc = evaluate(model, test_dataloader)
print('Test Accuracy:', test_acc)


Epoch: 1 | Validation Accuracy: 64.01273885350318
Epoch: 2 | Validation Accuracy: 89.49044585987261
Epoch: 3 | Validation Accuracy: 80.89171974522293
Epoch: 4 | Validation Accuracy: 92.99363057324841
Epoch: 5 | Validation Accuracy: 92.99363057324841
Epoch: 6 | Validation Accuracy: 81.21019108280255
Epoch: 7 | Validation Accuracy: 77.70700636942675
Early stopping.
Test Accuracy: 90.41450777202073


In [56]:
# F1, Precision ve Recall değerlerini hesaplayalım
model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for data, target in test_dataloader:
        output = model(data)
        _, predicted = torch.max(output.data, 1)
        y_true += target.numpy().tolist()
        y_pred += predicted.numpy().tolist()
from sklearn.metrics import classification_report
target_names = ['not_cured', 'cured']
print(classification_report(y_true, y_pred, target_names=target_names))

              precision    recall  f1-score   support

   not_cured       0.94      0.86      0.90       384
       cured       0.88      0.94      0.91       388

    accuracy                           0.90       772
   macro avg       0.91      0.90      0.90       772
weighted avg       0.91      0.90      0.90       772

