In [1]:
import os
import random
import time
import datetime
from collections import defaultdict, deque
from sklearn.metrics import cohen_kappa_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import USPS,FashionMNIST

In [2]:
BATCH_SIZE = 128
SEED = 45
NUM_CLASSES = 10
EPOCHS =5
MAX_EPOCHS = 100
PATIENCE = 10

In [3]:
def set_seed(seed =45) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)

    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")

set_seed(SEED)

Random seed set as 45


In [4]:
def reduce_across_processes(val):
    return torch.tensor(val)

class SmoothedValue:
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{median:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        t = reduce_across_processes([self.count, self.total])
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]


    def __str__(self):
        return self.fmt.format(
            median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value
        )

class MetricLogger:
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'")

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(f"{name}: {str(meter)}")
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ""
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt="{avg:.4f}")
        data_time = SmoothedValue(fmt="{avg:.4f}")
        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
        if torch.cuda.is_available():
            log_msg = self.delimiter.join(
                [
                    header,
                    "[{0" + space_fmt + "}/{1}]",
                    "eta: {eta}",
                    "{meters}",
                    "time: {time}",
                    "data: {data}",
                    "max mem: {memory:.0f}",
                ]
            )
        else:
            log_msg = self.delimiter.join(
                [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"]
            )
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(
                        log_msg.format(
                            i,
                            len(iterable),
                            eta=eta_string,
                            meters=str(self),
                            time=str(iter_time),
                            data=str(data_time),
                            memory=torch.cuda.max_memory_allocated() / MB,
                        )
                    )
                else:
                    print(
                        log_msg.format(
                            i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time)
                        )
                    )
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print(f"{header} Total time: {total_time_str}")

def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.inference_mode():
        maxk = max(topk)
        batch_size = target.size(0)
        if target.ndim == 2:
            target = target.max(dim=1)[1]

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target[None])

        res = []
        for k in topk:
            correct_k = correct[:k].flatten().sum(dtype=torch.float32)
            res.append(correct_k * (100.0 / batch_size))
        return res
def plot_losses(train_losses, val_losses, save_path):
  epochs = range(1, len(train_losses) + 1)
  plt.plot(epochs, train_losses, 'b', label='Training loss')
  plt.plot(epochs, val_losses, 'r', label='Validation loss')
  plt.title('Training and Validation Losses')
  plt.xlabel('Epochs')
  plt.ylabel('Loss')
  plt.legend()
  plt.savefig(save_path)
  plt.clf()

In [5]:
def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch):
    model.train()
    metric_logger = MetricLogger(delimiter="  ")
    metric_logger.add_meter("lr", SmoothedValue(window_size=1, fmt="{value}"))
    metric_logger.add_meter("img/s", SmoothedValue(window_size=10, fmt="{value}"))

    header = f"Epoch: [{epoch}]"
    for i, (image, target) in enumerate(metric_logger.log_every(data_loader, 10, header)):
        start_time = time.time()
        image, target = image.to(device), target.to(device)
        output = model(image)
        loss = criterion(output, target)
        optimizer.zero_grad()

        loss.backward()


        optimizer.step()

        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        batch_size = image.shape[0]
        metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"])
        metric_logger.meters["acc1"].update(acc1.item(), n=batch_size)
        metric_logger.meters["acc5"].update(acc5.item(), n=batch_size)
        metric_logger.meters["img/s"].update(batch_size / (time.time() - start_time))


    return metric_logger

def evaluate(model, criterion, data_loader, device, print_freq=100, log_suffix=""):
    model.eval()
    metric_logger = MetricLogger(delimiter="  ")
    header = f"Test: {log_suffix}"

    num_processed_samples = 0
    all_predictions = []
    all_targets = []
    with torch.inference_mode():
        for image, target in metric_logger.log_every(data_loader, print_freq, header):
            image = image.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)
            output = model(image)
            loss = criterion(output, target)

            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            batch_size = image.shape[0]
            metric_logger.update(loss=loss.item())
            metric_logger.meters["acc1"].update(acc1.item(), n=batch_size)
            metric_logger.meters["acc5"].update(acc5.item(), n=batch_size)
            num_processed_samples += batch_size
            _, predictions = output.max(1)
            all_predictions.extend(predictions.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    kappa_score = cohen_kappa_score(all_targets, all_predictions)
    metric_logger.meters["Kappa"] = kappa_score
    print(f"{header} Acc@1 {metric_logger.acc1.global_avg:.3f} Acc@5 {metric_logger.acc5.global_avg:.3f}  Kappa {kappa_score:.3f} Loss {metric_logger.loss.global_avg:.3f}") # Validation Loss u print etme kısmı buraya eklendi
    return metric_logger

In [6]:
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])


# usps dataset
train_set = USPS('./data', train=True, transform=transform, download=True)
split_ratio = 0.8
train_size = int(split_ratio * len(train_set))
val_size = len(train_set) - train_size

train_set, val_set = random_split(train_set, [train_size, val_size])
test_set = USPS('./data', train=False, transform=transform, download=True)

training_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=1, shuffle=False)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False)

100%|██████████| 6.58M/6.58M [00:00<00:00, 62.9MB/s]
100%|██████████| 1.83M/1.83M [00:00<00:00, 25.3MB/s]


In [7]:
class MyActivationFunction(nn.Module):
    def __init__(self):
        super(MyActivationFunction, self).__init__()

        self.new_weights = nn.Parameter(torch.randn(3, 1))

    def forward(self, x):
      return torch.where(
            0 <=x,
            x,
            1.5*(-1+torch.exp(0.6*x))
        )


In [8]:
# @title Varsayılan başlık metni


In [9]:
class EmptyActivationFunction(nn.Module):
  def __init__(self):
    super(EmptyActivationFunction, self).__init__()

  def forward(self, x):
    return x

In [10]:
from typing import Callable, Optional
from torchvision.models.resnet import conv1x1, conv3x3, ResNet
from torch import Tensor

class BasicBlock(nn.Module):
    expansion: int = 1

    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        groups: int = 1,
        base_width: int = 64,
        dilation: int = 1,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if groups != 1 or base_width != 64:
            raise ValueError("BasicBlock only supports groups=1 and base_width=64")
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")

        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        self.relu = MyActivationFunction()
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride
        self.relu2 = MyActivationFunction()


    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)


        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu2(out)

        return out


In [11]:
class NResNet(nn.Module):
  def __init__(self, num_classes=10, in_channels=1):
    super(NResNet, self).__init__()

    self.model = ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes)

    self.model.relu=MyActivationFunction()


    self.model.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)

  def forward(self, x):
    return self.model(x)



In [12]:
model=NResNet(in_channels=1)
print(model)

NResNet(
  (model): ResNet(
    (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): MyActivationFunction()
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): MyActivationFunction()
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): MyActivationFunction()
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, e

In [13]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
criterion = nn.CrossEntropyLoss()


train_losses = []
val_losses = []
best_kappa = 0.0
epochs_without_improvement = 0

start_time = time.time()

for epoch in range(0, MAX_EPOCHS):
  train_metric_logger = train_one_epoch(model, criterion, optimizer, training_loader, device, epoch)
  val_metric_logger = evaluate(model, criterion, val_loader, device=device)

  checkpoint = {"model": model.state_dict(), "optimizer": optimizer.state_dict()}

  train_loss = train_metric_logger.meters["loss"].global_avg
  train_losses.append(train_loss)

  val_loss = val_metric_logger.meters["loss"].global_avg
  val_losses.append(val_loss)

  kappa = val_metric_logger.meters["Kappa"]

  plot_losses(train_losses, val_losses, "train_val_loss_graph.png")


  torch.save(checkpoint, "checkpoint.pth")

  if kappa > best_kappa:
      torch.save(checkpoint, "best_model.pth")
      epochs_without_improvement = 0
      best_kappa = kappa
  else:
      epochs_without_improvement += 1

  if epochs_without_improvement >= PATIENCE:
      print("Early stopping triggered")
      break

total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print(f"Training time {total_time_str}")

Epoch: [0]  [ 0/46]  eta: 0:00:59  lr: 0.0003  img/s: 101.04816319155584  loss: 2.5703 (2.5703)  acc1: 9.3750 (9.3750)  acc5: 44.5312 (44.5312)  time: 1.2954  data: 0.0287  max mem: 280
Epoch: [0]  [10/46]  eta: 0:00:06  lr: 0.0003  img/s: 4732.475159550087  loss: 0.2895 (0.5902)  acc1: 90.6250 (80.9659)  acc5: 100.0000 (94.1761)  time: 0.1696  data: 0.0219  max mem: 282
Epoch: [0]  [20/46]  eta: 0:00:02  lr: 0.0003  img/s: 5218.165058074549  loss: 0.2685 (0.4099)  acc1: 91.4062 (87.0536)  acc5: 99.2188 (96.6890)  time: 0.0513  data: 0.0208  max mem: 282
Epoch: [0]  [30/46]  eta: 0:00:01  lr: 0.0003  img/s: 5337.856310525164  loss: 0.1621 (0.3271)  acc1: 94.5312 (89.7681)  acc5: 99.2188 (97.6562)  time: 0.0452  data: 0.0206  max mem: 282
Epoch: [0]  [40/46]  eta: 0:00:00  lr: 0.0003  img/s: 5338.015530698483  loss: 0.1252 (0.2746)  acc1: 96.0938 (91.4062)  acc5: 100.0000 (98.2088)  time: 0.0443  data: 0.0203  max mem: 282
Epoch: [0] Total time: 0:00:03
Test:   [   0/1459]  eta: 0:01:25

<Figure size 640x480 with 0 Axes>

In [14]:
CLASSES = [
    "0",
    "1",
    "2",
    "3",
    "4",
    "5",
    "6",
    "7",
    "8",
    "9"
]


def test(model, data_loader, device, print_freq=100, log_suffix=""):
    model.eval()
    metric_logger = MetricLogger(delimiter="  ")
    header = f"Test: {log_suffix}"

    num_processed_samples = 0
    all_predictions = []
    all_targets = []
    with torch.inference_mode():
        for image, target in metric_logger.log_every(data_loader, print_freq, header):
            image = image.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)
            output = model(image)
            loss = criterion(output, target)
            acc1, acc5 = accuracy(output, target, topk=(1, 5))

            batch_size = image.shape[0]
            metric_logger.update(loss=loss.item())
            metric_logger.meters["acc1"].update(acc1.item(), n=batch_size)
            metric_logger.meters["acc5"].update(acc5.item(), n=batch_size)
            num_processed_samples += batch_size
            _, predictions = output.max(1)
            all_predictions.extend(predictions.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    metric_logger.synchronize_between_processes()

    print(f"{header} Acc@1 {metric_logger.acc1.global_avg:.3f} Acc@5 {metric_logger.acc5.global_avg:.3f} Loss {metric_logger.loss.global_avg:.3f}") # Test Loss u print etme kısmı buraya eklendi


    conf_matrix = confusion_matrix(all_targets, all_predictions)
    kappa_score = cohen_kappa_score(all_targets, all_predictions)
    print("Kappa score: ", kappa_score)


    disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=CLASSES)
    disp.plot()
    plt.savefig(os.path.join("confusion_matrix.png"))
    plt.clf()
    return metric_logger.acc1.global_avg



In [15]:
# @title Varsayılan başlık metni
# Validation results (burayı yeni ekledim)

checkpoint = torch.load("best_model.pth", map_location="cpu")
model.load_state_dict(checkpoint["model"])

print("Start validating")
start_time = time.time()
test(model, val_loader, device=device)

total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print(f"Validating time {total_time_str}")

Start validating
Test:   [   0/1459]  eta: 0:00:09  loss: 0.0000 (0.0000)  acc1: 100.0000 (100.0000)  acc5: 100.0000 (100.0000)  time: 0.0063  data: 0.0009  max mem: 282
Test:   [ 100/1459]  eta: 0:00:07  loss: 0.0000 (0.0162)  acc1: 100.0000 (99.0099)  acc5: 100.0000 (100.0000)  time: 0.0053  data: 0.0003  max mem: 282
Test:   [ 200/1459]  eta: 0:00:06  loss: 0.0006 (0.0266)  acc1: 100.0000 (99.0050)  acc5: 100.0000 (100.0000)  time: 0.0053  data: 0.0003  max mem: 282
Test:   [ 300/1459]  eta: 0:00:06  loss: 0.0000 (0.0953)  acc1: 100.0000 (98.3389)  acc5: 100.0000 (99.6678)  time: 0.0051  data: 0.0003  max mem: 282
Test:   [ 400/1459]  eta: 0:00:05  loss: 0.0001 (0.0771)  acc1: 100.0000 (98.5037)  acc5: 100.0000 (99.7506)  time: 0.0050  data: 0.0003  max mem: 282
Test:   [ 500/1459]  eta: 0:00:04  loss: 0.0001 (0.0665)  acc1: 100.0000 (98.6028)  acc5: 100.0000 (99.8004)  time: 0.0050  data: 0.0003  max mem: 282
Test:   [ 600/1459]  eta: 0:00:04  loss: 0.0000 (0.0778)  acc1: 100.0000 

<Figure size 640x480 with 0 Axes>

In [16]:
# Test results

checkpoint = torch.load("best_model.pth", map_location="cpu")
model.load_state_dict(checkpoint["model"])

print("Start testing")
start_time = time.time()
test(model, test_loader, device=device)

total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print(f"Testing time {total_time_str}")

Start testing
Test:   [   0/2007]  eta: 0:00:14  loss: 0.0000 (0.0000)  acc1: 100.0000 (100.0000)  acc5: 100.0000 (100.0000)  time: 0.0071  data: 0.0010  max mem: 282
Test:   [ 100/2007]  eta: 0:00:09  loss: 0.0000 (0.0622)  acc1: 100.0000 (98.0198)  acc5: 100.0000 (100.0000)  time: 0.0052  data: 0.0003  max mem: 282
Test:   [ 200/2007]  eta: 0:00:09  loss: 0.0001 (0.1688)  acc1: 100.0000 (96.0199)  acc5: 100.0000 (100.0000)  time: 0.0052  data: 0.0003  max mem: 282
Test:   [ 300/2007]  eta: 0:00:08  loss: 0.0000 (0.1767)  acc1: 100.0000 (96.6777)  acc5: 100.0000 (100.0000)  time: 0.0052  data: 0.0003  max mem: 282
Test:   [ 400/2007]  eta: 0:00:08  loss: 0.0000 (0.1454)  acc1: 100.0000 (97.2569)  acc5: 100.0000 (100.0000)  time: 0.0051  data: 0.0003  max mem: 282
Test:   [ 500/2007]  eta: 0:00:07  loss: 0.0006 (0.1504)  acc1: 100.0000 (97.4052)  acc5: 100.0000 (100.0000)  time: 0.0053  data: 0.0003  max mem: 282
Test:   [ 600/2007]  eta: 0:00:07  loss: 0.0000 (0.1616)  acc1: 100.0000 

<Figure size 640x480 with 0 Axes>